diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,128002 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 3999, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1471.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1177.9375, + "completions/mean_terminated_length": 1177.9375, + "completions/min_length": 819.0, + "completions/min_terminated_length": 819.0, + "epoch": 0.00025006251562890725, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.189586777612287, + "kl": 0.0005757808685302734, + "learning_rate": 0.0, + "loss": 0.0092, + "num_tokens": 44999.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7770684957504272, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.016377859323407972, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06091538968768832, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13221755360572018, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1251.5, + "completions/mean_terminated_length": 1216.0, + "completions/min_length": 944.0, + "completions/min_terminated_length": 944.0, + "epoch": 0.0005001250312578145, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.083163453141004, + "kl": 0.00225830078125, + "learning_rate": 2.5e-09, + "loss": 0.0078, + "num_tokens": 96863.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0469045639038086, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 1.2679006115772634e-05, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18120153989509685, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08944271909999162, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1348.0, + "completions/mean_length": 1152.25, + "completions/mean_terminated_length": 1129.0667724609375, + "completions/min_length": 922.0, + "completions/min_terminated_length": 922.0, + "epoch": 0.0007501875468867217, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8339165001504045, + "kl": 0.0015735626220703125, + "learning_rate": 5e-09, + "loss": -0.0172, + "num_tokens": 131811.0, + "reward": 4.470348358154297e-08, + "reward_std": 0.9432728290557861, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06667226330308461, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.054408860091332695, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09428090415820636, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1293.0, + "completions/max_terminated_length": 1293.0, + "completions/mean_length": 897.25, + "completions/mean_terminated_length": 897.25, + "completions/min_length": 466.0, + "completions/min_terminated_length": 466.0, + "epoch": 0.001000250062515629, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7660396792315827, + "kl": 0.0012102127075195312, + "learning_rate": 7.5e-09, + "loss": -0.0081, + "num_tokens": 164343.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.49418342113494873, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.029200372283519384, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.046912860411538776, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08509254221575911, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 4 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 984.5, + "completions/mean_terminated_length": 950.1333618164062, + "completions/min_length": 502.0, + "completions/min_terminated_length": 502.0, + "epoch": 0.001250312578144536, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2643225767107054, + "kl": 0.00135040283203125, + "learning_rate": 1e-08, + "loss": 0.0035, + "num_tokens": 194559.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.25354495644569397, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08080288903833875, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10704281164984458, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0894427190999916, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 5 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1169.4375, + "completions/mean_terminated_length": 1147.4000244140625, + "completions/min_length": 800.0, + "completions/min_terminated_length": 800.0, + "epoch": 0.0015003750937734434, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.535672315292492, + "kl": 0.00272369384765625, + "learning_rate": 1.25e-08, + "loss": -0.0397, + "num_tokens": 247582.0, + "reward": 0.0, + "reward_std": 1.018636703491211, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02070088664483012, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07946908904361012, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941139, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 6 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1427.0, + "completions/mean_length": 1264.25, + "completions/mean_terminated_length": 1248.533447265625, + "completions/min_length": 963.0, + "completions/min_terminated_length": 963.0, + "epoch": 0.0017504376094023505, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.229327731917989, + "kl": 0.0009708404541015625, + "learning_rate": 1.5e-08, + "loss": -0.0096, + "num_tokens": 291338.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.01985764503479, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06470065520476316, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07210768776160932, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08255189164891874, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 7 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1266.25, + "completions/mean_terminated_length": 1250.666748046875, + "completions/min_length": 967.0, + "completions/min_terminated_length": 967.0, + "epoch": 0.002000500125031258, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3021659122871285, + "kl": 0.0025177001953125, + "learning_rate": 1.75e-08, + "loss": -0.0351, + "num_tokens": 346390.0, + "reward": 0.0, + "reward_std": 0.7840641736984253, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.2227762651927374, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13734763864692226, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05000000000000001, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 8 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1428.0, + "completions/mean_length": 1241.0, + "completions/mean_terminated_length": 1223.7333984375, + "completions/min_length": 1070.0, + "completions/min_terminated_length": 1070.0, + "epoch": 0.002250562640660165, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.29347914476156, + "kl": 0.002597808837890625, + "learning_rate": 2e-08, + "loss": 0.0006, + "num_tokens": 397918.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0490429401397705, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05210811113388129, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11209735241076955, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11213417888437976, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 9 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1002.0, + "completions/mean_length": 1215.875, + "completions/mean_terminated_length": 931.75, + "completions/min_length": 785.0, + "completions/min_terminated_length": 785.0, + "epoch": 0.002500625156289072, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.021833088959922, + "kl": 0.0016021728515625, + "learning_rate": 2.25e-08, + "loss": 0.0532, + "num_tokens": 440404.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8550092577934265, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.021980892938893715, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.067824815503059, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10327955589886445, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 10 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1184.0, + "completions/max_terminated_length": 1184.0, + "completions/mean_length": 978.25, + "completions/mean_terminated_length": 978.25, + "completions/min_length": 698.0, + "completions/min_terminated_length": 698.0, + "epoch": 0.0027506876719179795, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4304013633797, + "kl": 0.0019359588623046875, + "learning_rate": 2.5e-08, + "loss": 0.0283, + "num_tokens": 483656.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0537045001983643, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0028288011735883942, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.056246889576325655, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059628479399994376, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 11 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1251.8125, + "completions/mean_terminated_length": 1194.5384521484375, + "completions/min_length": 887.0, + "completions/min_terminated_length": 887.0, + "epoch": 0.003000750187546887, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.960493761448213, + "kl": 0.002056121826171875, + "learning_rate": 2.7499999999999998e-08, + "loss": -0.0039, + "num_tokens": 526613.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.0185887813568115, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07926164231200171, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1502458430854509, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 12 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1341.0, + "completions/mean_length": 1154.1875, + "completions/mean_terminated_length": 1038.916748046875, + "completions/min_length": 691.0, + "completions/min_terminated_length": 691.0, + "epoch": 0.003250812703175794, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.199018227648173, + "kl": 0.00200653076171875, + "learning_rate": 3e-08, + "loss": -0.0179, + "num_tokens": 572040.0, + "reward": 0.0, + "reward_std": 1.017466425895691, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05300794944968118, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05904593700248093, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952499, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 13 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1363.0, + "completions/max_terminated_length": 1363.0, + "completions/mean_length": 933.5, + "completions/mean_terminated_length": 933.5, + "completions/min_length": 510.0, + "completions/min_terminated_length": 510.0, + "epoch": 0.003500875218804701, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6047477537485904, + "kl": 0.00188446044921875, + "learning_rate": 3.25e-08, + "loss": -0.1087, + "num_tokens": 600008.0, + "reward": 0.0, + "reward_std": 0.9282970428466797, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.004477472724929523, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.017909890899718093, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09651328828101766, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 14 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1347.0, + "completions/max_terminated_length": 1347.0, + "completions/mean_length": 1117.4375, + "completions/mean_terminated_length": 1117.4375, + "completions/min_length": 975.0, + "completions/min_terminated_length": 975.0, + "epoch": 0.0037509377344336083, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2076445501941024, + "kl": 0.0023345947265625, + "learning_rate": 3.5e-08, + "loss": 0.0127, + "num_tokens": 652327.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9863706827163696, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08184099718629695, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09726239043427391, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10174405069512348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 15 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 926.5625, + "completions/mean_terminated_length": 888.3333740234375, + "completions/min_length": 453.0, + "completions/min_terminated_length": 453.0, + "epoch": 0.004001000250062516, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6344808613721686, + "kl": 0.0022754669189453125, + "learning_rate": 3.75e-08, + "loss": -0.0739, + "num_tokens": 689416.0, + "reward": 0.0, + "reward_std": 0.7991452217102051, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06284915195738497, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1482161864953295, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12852438880818895, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 16 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1385.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 1102.0625, + "completions/mean_terminated_length": 1102.0625, + "completions/min_length": 885.0, + "completions/min_terminated_length": 885.0, + "epoch": 0.0042510627656914225, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3889032037725033, + "kl": 0.002361297607421875, + "learning_rate": 4e-08, + "loss": -0.0202, + "num_tokens": 732753.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9978504180908203, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04621088355041644, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.077711423743284, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12524050936172845, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 17 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1313.0, + "completions/max_terminated_length": 1313.0, + "completions/mean_length": 1094.625, + "completions/mean_terminated_length": 1094.625, + "completions/min_length": 916.0, + "completions/min_terminated_length": 916.0, + "epoch": 0.00450112528132033, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.60894813818436, + "kl": 0.0026397705078125, + "learning_rate": 4.2500000000000003e-08, + "loss": -0.0284, + "num_tokens": 781651.0, + "reward": 0.0, + "reward_std": 0.9224053025245667, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10785916141870436, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.044720613455838075, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11894598836509011, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 18 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1437.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1091.0, + "completions/mean_terminated_length": 1091.0, + "completions/min_length": 858.0, + "completions/min_terminated_length": 858.0, + "epoch": 0.004751187796949237, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1691192427387485, + "kl": 0.0018768310546875, + "learning_rate": 4.5e-08, + "loss": -0.0002, + "num_tokens": 822035.0, + "reward": 0.0, + "reward_std": 0.29055729508399963, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.16392447531363805, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08008285641059446, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869929, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 19 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1360.0, + "completions/mean_length": 1111.0, + "completions/mean_terminated_length": 1055.4285888671875, + "completions/min_length": 751.0, + "completions/min_terminated_length": 751.0, + "epoch": 0.005001250312578144, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.457974870741892, + "kl": 0.0009918212890625, + "learning_rate": 4.7499999999999995e-08, + "loss": -0.0918, + "num_tokens": 860995.0, + "reward": 0.0, + "reward_std": 0.5498136878013611, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011560321799926365, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05808429257166532, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14950535726806533, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 20 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1348.0, + "completions/mean_length": 1154.625, + "completions/mean_terminated_length": 1131.60009765625, + "completions/min_length": 873.0, + "completions/min_terminated_length": 873.0, + "epoch": 0.005251312828207052, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.794082365969026, + "kl": 0.001659393310546875, + "learning_rate": 5e-08, + "loss": -0.0682, + "num_tokens": 907141.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7616308927536011, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1492956476149084, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10496726702669136, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06763190130459203, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 21 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1265.0, + "completions/max_terminated_length": 1265.0, + "completions/mean_length": 983.6875, + "completions/mean_terminated_length": 983.6875, + "completions/min_length": 819.0, + "completions/min_terminated_length": 819.0, + "epoch": 0.005501375343835959, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.723801302250852, + "kl": 0.0022430419921875, + "learning_rate": 5.2499999999999994e-08, + "loss": 0.0003, + "num_tokens": 947728.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9178466796875, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0013219398370949778, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05691104063625746, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08073734277593314, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 22 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1356.4375, + "completions/mean_terminated_length": 1270.300048828125, + "completions/min_length": 891.0, + "completions/min_terminated_length": 891.0, + "epoch": 0.005751437859464866, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2865514217101457, + "kl": 0.0013141632080078125, + "learning_rate": 5.4999999999999996e-08, + "loss": -0.0056, + "num_tokens": 1003959.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7916842103004456, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.35706856240916407, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11458171347077557, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901162, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 23 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1248.5625, + "completions/mean_terminated_length": 1190.5384521484375, + "completions/min_length": 995.0, + "completions/min_terminated_length": 995.0, + "epoch": 0.006001500375093774, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.695678555222019, + "kl": 0.0009126663208007812, + "learning_rate": 5.75e-08, + "loss": 0.013, + "num_tokens": 1048504.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6444647908210754, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11623738697823527, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1571112253850851, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06309898162000305, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 24 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1467.0, + "completions/max_terminated_length": 1467.0, + "completions/mean_length": 1168.1875, + "completions/mean_terminated_length": 1168.1875, + "completions/min_length": 634.0, + "completions/min_terminated_length": 634.0, + "epoch": 0.006251562890722681, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8005935431701214, + "kl": 0.00262451171875, + "learning_rate": 6e-08, + "loss": 0.004, + "num_tokens": 1092019.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9862891435623169, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03158628363086885, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08805432971412637, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 25 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1411.0, + "completions/max_terminated_length": 1411.0, + "completions/mean_length": 1092.25, + "completions/mean_terminated_length": 1092.25, + "completions/min_length": 932.0, + "completions/min_terminated_length": 932.0, + "epoch": 0.006501625406351588, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.4677320750829745, + "kl": 0.00037539005279541016, + "learning_rate": 6.25e-08, + "loss": -0.0262, + "num_tokens": 1126799.0, + "reward": 0.0, + "reward_std": 1.0439221858978271, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0859231870135623, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.054619093897018946, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10027739304327551, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 26 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1448.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1036.25, + "completions/mean_terminated_length": 1036.25, + "completions/min_length": 606.0, + "completions/min_terminated_length": 606.0, + "epoch": 0.006751687921980495, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5043522108326215, + "kl": 0.002655029296875, + "learning_rate": 6.5e-08, + "loss": -0.0064, + "num_tokens": 1178187.0, + "reward": 0.0, + "reward_std": 0.9238001704216003, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09218029086642417, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13548732107043227, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08509254221575908, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 27 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1289.0, + "completions/mean_length": 1088.6875, + "completions/mean_terminated_length": 1061.2667236328125, + "completions/min_length": 840.0, + "completions/min_terminated_length": 840.0, + "epoch": 0.007001750437609402, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7327871417568406, + "kl": 0.0013484954833984375, + "learning_rate": 6.75e-08, + "loss": 0.0202, + "num_tokens": 1223846.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7984364032745361, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05118103541487361, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04865159816657668, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941137, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 28 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1163.0, + "completions/max_terminated_length": 1163.0, + "completions/mean_length": 939.9375, + "completions/mean_terminated_length": 939.9375, + "completions/min_length": 664.0, + "completions/min_terminated_length": 664.0, + "epoch": 0.007251812953238309, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6352929212559353, + "kl": 0.0024566650390625, + "learning_rate": 7e-08, + "loss": 0.0147, + "num_tokens": 1270485.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8627786636352539, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06872618730103291, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05688570049773068, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333333, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 29 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1375.0, + "completions/mean_length": 1133.1875, + "completions/mean_terminated_length": 1080.7857666015625, + "completions/min_length": 614.0, + "completions/min_terminated_length": 614.0, + "epoch": 0.007501875468867217, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6286769222693698, + "kl": 0.002471923828125, + "learning_rate": 7.25e-08, + "loss": 0.0598, + "num_tokens": 1323304.0, + "reward": 0.0, + "reward_std": 0.5249185562133789, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03254148488292387, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11197774588364875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.20940833758915106, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 30 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 1500.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 1500.0, + "completions/min_terminated_length": 0.0, + "epoch": 0.007751937984496124, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.4802304744308945, + "kl": 0.0005712509155273438, + "learning_rate": 7.5e-08, + "loss": 0.0, + "num_tokens": 1392456.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8629792928695679, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.33160284405983426, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.36880671982582824, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6416666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14168300559373403, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 31 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1449.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1086.75, + "completions/mean_terminated_length": 1086.75, + "completions/min_length": 886.0, + "completions/min_terminated_length": 886.0, + "epoch": 0.008002000500125032, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8324610902897125, + "kl": 0.00131988525390625, + "learning_rate": 7.75e-08, + "loss": -0.0657, + "num_tokens": 1445532.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8534313440322876, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03807879340945452, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.023230611310410088, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11147163731607217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 32 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1085.0625, + "completions/mean_terminated_length": 1057.4000244140625, + "completions/min_length": 675.0, + "completions/min_terminated_length": 675.0, + "epoch": 0.008252063015753939, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2339924107340385, + "kl": 0.002376556396484375, + "learning_rate": 8e-08, + "loss": -0.0168, + "num_tokens": 1485861.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8873122930526733, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.030865613761115622, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02349589175913767, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16843506277010845, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 33 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1030.375, + "completions/mean_terminated_length": 922.0000610351562, + "completions/min_length": 649.0, + "completions/min_terminated_length": 649.0, + "epoch": 0.008502125531382845, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.700465369595541, + "kl": 0.00131988525390625, + "learning_rate": 8.25e-08, + "loss": -0.0386, + "num_tokens": 1532835.0, + "reward": 0.0, + "reward_std": 0.7919942736625671, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.156673006288489, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16988177983612945, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07685966046898336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 34 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1227.375, + "completions/mean_terminated_length": 1063.800048828125, + "completions/min_length": 777.0, + "completions/min_terminated_length": 777.0, + "epoch": 0.008752188047011753, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1310388682827877, + "kl": 0.002460479736328125, + "learning_rate": 8.500000000000001e-08, + "loss": 0.0135, + "num_tokens": 1586521.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9543017148971558, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06433234977570508, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07394505228389964, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 35 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1425.0, + "completions/mean_length": 1026.0625, + "completions/mean_terminated_length": 994.4667358398438, + "completions/min_length": 831.0, + "completions/min_terminated_length": 831.0, + "epoch": 0.00900225056264066, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.822771479259306, + "kl": 0.0014171600341796875, + "learning_rate": 8.75e-08, + "loss": 0.0241, + "num_tokens": 1641746.0, + "reward": -1.862645149230957e-08, + "reward_std": 1.032823920249939, + "rewards/wordcountpos_reward_GEOBench/mean": -1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009582741844079052, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0786878748077041, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07290277645477446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 36 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1242.625, + "completions/mean_terminated_length": 1205.857177734375, + "completions/min_length": 649.0, + "completions/min_terminated_length": 649.0, + "epoch": 0.009252313078269568, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7102644805242, + "kl": 0.0014777183532714844, + "learning_rate": 9e-08, + "loss": -0.0727, + "num_tokens": 1694116.0, + "reward": 0.0, + "reward_std": 0.732426643371582, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.040917228757120186, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.083098123249028, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11147163731607215, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 37 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1254.75, + "completions/mean_terminated_length": 1143.272705078125, + "completions/min_length": 791.0, + "completions/min_terminated_length": 791.0, + "epoch": 0.009502375593898474, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2785295758519806, + "kl": 0.002132415771484375, + "learning_rate": 9.25e-08, + "loss": -0.0009, + "num_tokens": 1741704.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.3047698140144348, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06013973047575619, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11580498985244345, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.2135675797285513, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 38 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1106.5625, + "completions/mean_terminated_length": 975.4166870117188, + "completions/min_length": 592.0, + "completions/min_terminated_length": 592.0, + "epoch": 0.009752438109527382, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.662352224490162, + "kl": 0.0011415481567382812, + "learning_rate": 9.499999999999999e-08, + "loss": 0.0226, + "num_tokens": 1784137.0, + "reward": -5.587935447692871e-09, + "reward_std": 0.9097418785095215, + "rewards/wordcountpos_reward_GEOBench/mean": -5.587935447692871e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00793885200665826, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04101074421500302, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11155467020454342, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 39 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1277.6875, + "completions/mean_terminated_length": 1226.3846435546875, + "completions/min_length": 1063.0, + "completions/min_terminated_length": 1063.0, + "epoch": 0.010002500625156289, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.926945902838626, + "kl": 0.001800537109375, + "learning_rate": 9.749999999999999e-08, + "loss": 0.0068, + "num_tokens": 1830476.0, + "reward": 0.0, + "reward_std": 0.9452263116836548, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.021940180322990964, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03827811494577865, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590966, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 40 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1484.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1041.0625, + "completions/mean_terminated_length": 1041.0625, + "completions/min_length": 694.0, + "completions/min_terminated_length": 694.0, + "epoch": 0.010252563140785197, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.729093220462787, + "kl": 0.0022869110107421875, + "learning_rate": 1e-07, + "loss": -0.0299, + "num_tokens": 1878221.0, + "reward": 0.0, + "reward_std": 0.8691483736038208, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011521479292543093, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03958917370863923, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11474609652039006, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 41 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1268.1875, + "completions/mean_terminated_length": 1190.916748046875, + "completions/min_length": 896.0, + "completions/min_terminated_length": 896.0, + "epoch": 0.010502625656414103, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9006842779247717, + "kl": 0.00203704833984375, + "learning_rate": 1.0249999999999998e-07, + "loss": -0.0011, + "num_tokens": 1924848.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0137510299682617, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04827326742286115, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11946178408637198, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06070572613176774, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 42 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1153.0, + "completions/max_terminated_length": 1153.0, + "completions/mean_length": 919.8125, + "completions/mean_terminated_length": 919.8125, + "completions/min_length": 823.0, + "completions/min_terminated_length": 823.0, + "epoch": 0.010752688172043012, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.7507850486443338, + "kl": 7.030367851257324e-05, + "learning_rate": 1.0499999999999999e-07, + "loss": 0.0076, + "num_tokens": 1968005.0, + "reward": 0.0, + "reward_std": 0.6309376358985901, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.005098874482083456, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.054474360469544784, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1258305739211792, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 43 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1317.0, + "completions/max_terminated_length": 1317.0, + "completions/mean_length": 1100.5, + "completions/mean_terminated_length": 1100.5, + "completions/min_length": 828.0, + "completions/min_terminated_length": 828.0, + "epoch": 0.011002750687671918, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6574700664832895, + "kl": 0.002552032470703125, + "learning_rate": 1.0749999999999999e-07, + "loss": -0.0366, + "num_tokens": 2012045.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0180362462997437, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05144156099585784, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0578126131603115, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06871842709362769, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 44 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1312.0, + "completions/mean_length": 1159.9375, + "completions/mean_terminated_length": 895.4444580078125, + "completions/min_length": 697.0, + "completions/min_terminated_length": 697.0, + "epoch": 0.011252813203300824, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1912360583951, + "kl": 0.0007867813110351562, + "learning_rate": 1.0999999999999999e-07, + "loss": -0.1008, + "num_tokens": 2052764.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8970330953598022, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.014303503917448859, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07644480991224792, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.2220693918909466, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 45 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1166.0, + "completions/max_terminated_length": 1166.0, + "completions/mean_length": 1039.125, + "completions/mean_terminated_length": 1039.125, + "completions/min_length": 907.0, + "completions/min_terminated_length": 907.0, + "epoch": 0.011502875718929733, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8570968389097002, + "kl": 0.002849578857421875, + "learning_rate": 1.125e-07, + "loss": -0.0027, + "num_tokens": 2102214.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8281359672546387, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1095409442528937, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1645376736938151, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12988598989256064, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 46 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1340.0, + "completions/mean_length": 1078.1875, + "completions/mean_terminated_length": 1050.0667724609375, + "completions/min_length": 543.0, + "completions/min_terminated_length": 543.0, + "epoch": 0.011752938234558639, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0302838395790226, + "kl": 0.002025604248046875, + "learning_rate": 1.15e-07, + "loss": -0.0742, + "num_tokens": 2153881.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0420688390731812, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0010787076560995236, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09191875636932204, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07084150279686706, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 47 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1487.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1157.75, + "completions/mean_terminated_length": 1157.75, + "completions/min_length": 719.0, + "completions/min_terminated_length": 719.0, + "epoch": 0.012003000750187547, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0155268802193764, + "kl": 0.0023651123046875, + "learning_rate": 1.1749999999999999e-07, + "loss": -0.0125, + "num_tokens": 2213077.0, + "reward": -5.587935447692871e-09, + "reward_std": 1.0057098865509033, + "rewards/wordcountpos_reward_GEOBench/mean": -5.587935447692871e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10227241261857678, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18998424049117243, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941137, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 48 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1091.0, + "completions/max_terminated_length": 1091.0, + "completions/mean_length": 1033.4375, + "completions/mean_terminated_length": 1033.4375, + "completions/min_length": 905.0, + "completions/min_terminated_length": 905.0, + "epoch": 0.012253063265816454, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9926013833257805, + "kl": 0.001644134521484375, + "learning_rate": 1.2e-07, + "loss": 0.0037, + "num_tokens": 2251972.0, + "reward": 0.0, + "reward_std": 0.8031637072563171, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06472839374053614, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10184616870874212, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12285191326386659, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 49 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1266.0, + "completions/max_terminated_length": 1266.0, + "completions/mean_length": 1036.4375, + "completions/mean_terminated_length": 1036.4375, + "completions/min_length": 842.0, + "completions/min_terminated_length": 842.0, + "epoch": 0.012503125781445362, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.395865141531731, + "kl": 0.002544403076171875, + "learning_rate": 1.225e-07, + "loss": -0.0026, + "num_tokens": 2286923.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.35296452045440674, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.021715612747779355, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1289984386961949, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11409872268574492, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 50 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1132.0, + "completions/max_terminated_length": 1132.0, + "completions/mean_length": 886.4375, + "completions/mean_terminated_length": 886.4375, + "completions/min_length": 733.0, + "completions/min_terminated_length": 733.0, + "epoch": 0.012753188297074268, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.740942026226683, + "kl": 0.00202178955078125, + "learning_rate": 1.25e-07, + "loss": 0.0031, + "num_tokens": 2333178.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8394395112991333, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05266528727920731, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06932423561592163, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10610965676722955, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 51 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1111.875, + "completions/mean_terminated_length": 1086.0, + "completions/min_length": 766.0, + "completions/min_terminated_length": 766.0, + "epoch": 0.013003250812703177, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0517068830745484, + "kl": 0.001956939697265625, + "learning_rate": 1.275e-07, + "loss": -0.0251, + "num_tokens": 2371768.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9202165603637695, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02844132822126665, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.034094225332252856, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07698003589195009, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 52 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1205.0, + "completions/max_terminated_length": 1205.0, + "completions/mean_length": 947.25, + "completions/mean_terminated_length": 947.25, + "completions/min_length": 721.0, + "completions/min_terminated_length": 721.0, + "epoch": 0.013253313328332083, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.2372401474945989, + "kl": 0.0004727691411972046, + "learning_rate": 1.3e-07, + "loss": 0.0447, + "num_tokens": 2405772.0, + "reward": 0.0, + "reward_std": 0.7103041410446167, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0240213095274885, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06979942765210134, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 53 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1291.0, + "completions/mean_length": 1213.6875, + "completions/mean_terminated_length": 1083.5455322265625, + "completions/min_length": 853.0, + "completions/min_terminated_length": 853.0, + "epoch": 0.01350337584396099, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0066970807273607, + "kl": 0.0017719268798828125, + "learning_rate": 1.325e-07, + "loss": 0.0448, + "num_tokens": 2458087.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6927663087844849, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01467156196325391, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1027762376603128, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258099, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 54 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 934.0, + "completions/max_terminated_length": 934.0, + "completions/mean_length": 822.9375, + "completions/mean_terminated_length": 822.9375, + "completions/min_length": 673.0, + "completions/min_terminated_length": 673.0, + "epoch": 0.013753438359589898, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.479759404349975, + "kl": 0.001560211181640625, + "learning_rate": 1.35e-07, + "loss": 0.0019, + "num_tokens": 2499990.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8969330787658691, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08743978706337867, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10785832734313104, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 55 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1487.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1070.8125, + "completions/mean_terminated_length": 1070.8125, + "completions/min_length": 845.0, + "completions/min_terminated_length": 845.0, + "epoch": 0.014003500875218804, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.795081525028026, + "kl": 0.001636505126953125, + "learning_rate": 1.375e-07, + "loss": 0.0125, + "num_tokens": 2546363.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.8386225700378418, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009944282634284329, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07769957158427523, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1458055529095489, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 56 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1481.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1211.0, + "completions/mean_terminated_length": 1211.0, + "completions/min_length": 740.0, + "completions/min_terminated_length": 740.0, + "epoch": 0.014253563390847712, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0112498408490684, + "kl": 0.001708984375, + "learning_rate": 1.4e-07, + "loss": 0.0194, + "num_tokens": 2593907.0, + "reward": 0.0, + "reward_std": 0.71753990650177, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03011310053456635, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05252249254169858, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353545, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 57 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1233.25, + "completions/mean_terminated_length": 1171.6923828125, + "completions/min_length": 964.0, + "completions/min_terminated_length": 964.0, + "epoch": 0.014503625906476619, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.494692512980705, + "kl": 0.002658843994140625, + "learning_rate": 1.4249999999999999e-07, + "loss": 0.0357, + "num_tokens": 2643591.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0092942714691162, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011541700312712472, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.034733372026321964, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14395215254459456, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 58 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1232.0, + "completions/mean_length": 1104.875, + "completions/mean_terminated_length": 1048.4285888671875, + "completions/min_length": 815.0, + "completions/min_terminated_length": 815.0, + "epoch": 0.014753688422105527, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9838704094003963, + "kl": 0.00188446044921875, + "learning_rate": 1.45e-07, + "loss": 0.001, + "num_tokens": 2698685.0, + "reward": 0.0, + "reward_std": 1.006044864654541, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005957188021272992, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.037001115771667245, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0877707451472511, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 59 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1388.0, + "completions/mean_length": 1138.1875, + "completions/mean_terminated_length": 1114.0667724609375, + "completions/min_length": 887.0, + "completions/min_terminated_length": 887.0, + "epoch": 0.015003750937734433, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5443329865928384, + "kl": 0.00286865234375, + "learning_rate": 1.475e-07, + "loss": -0.0361, + "num_tokens": 2746040.0, + "reward": 0.0, + "reward_std": 0.9534966945648193, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04422983602752913, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06068607065169511, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 60 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1113.0, + "completions/max_terminated_length": 1113.0, + "completions/mean_length": 930.75, + "completions/mean_terminated_length": 930.75, + "completions/min_length": 790.0, + "completions/min_terminated_length": 790.0, + "epoch": 0.015253813453363341, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.196585183198036, + "kl": 0.0022640228271484375, + "learning_rate": 1.5e-07, + "loss": -0.0151, + "num_tokens": 2797916.0, + "reward": 0.0, + "reward_std": 1.031198263168335, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06496199678593485, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10965750644106184, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901862, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 61 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1367.0, + "completions/mean_length": 1180.6875, + "completions/mean_terminated_length": 1159.4000244140625, + "completions/min_length": 908.0, + "completions/min_terminated_length": 908.0, + "epoch": 0.015503875968992248, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.117153858618595, + "kl": 0.0020599365234375, + "learning_rate": 1.525e-07, + "loss": -0.0202, + "num_tokens": 2840103.0, + "reward": 0.0, + "reward_std": 0.3633580207824707, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07207462866079158, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2558689650377093, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12881223774390613, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 62 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1084.0, + "completions/max_terminated_length": 1084.0, + "completions/mean_length": 886.875, + "completions/mean_terminated_length": 886.875, + "completions/min_length": 610.0, + "completions/min_terminated_length": 610.0, + "epoch": 0.015753938484621154, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.520554902484488, + "kl": 0.003162384033203125, + "learning_rate": 1.55e-07, + "loss": 0.0118, + "num_tokens": 2877893.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7622057199478149, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.034506061927002556, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.031995256227217585, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11287488977066928, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 63 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1416.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1122.6875, + "completions/mean_terminated_length": 1122.6875, + "completions/min_length": 898.0, + "completions/min_terminated_length": 898.0, + "epoch": 0.016004001000250064, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.980654960388396, + "kl": 0.0007505416870117188, + "learning_rate": 1.575e-07, + "loss": -0.0088, + "num_tokens": 2929424.0, + "reward": 0.0, + "reward_std": 0.904703676700592, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0068085183866428, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.123614003873515, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11147163731607215, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 64 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1207.0, + "completions/max_terminated_length": 1207.0, + "completions/mean_length": 878.0625, + "completions/mean_terminated_length": 878.0625, + "completions/min_length": 641.0, + "completions/min_terminated_length": 641.0, + "epoch": 0.01625406351587897, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1701892030211236, + "kl": 0.0016956329345703125, + "learning_rate": 1.6e-07, + "loss": 0.018, + "num_tokens": 2963553.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9831296801567078, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.037242927940112275, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05857346231112036, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042256, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 65 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1398.0, + "completions/mean_length": 1037.875, + "completions/mean_terminated_length": 1007.0667114257812, + "completions/min_length": 830.0, + "completions/min_terminated_length": 830.0, + "epoch": 0.016504126031507877, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4913287782150566, + "kl": 0.0017604827880859375, + "learning_rate": 1.625e-07, + "loss": 0.0473, + "num_tokens": 2991863.0, + "reward": 0.0, + "reward_std": 0.8729775547981262, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.14430233209311744, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20791955685864577, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12405196043952263, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 66 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1239.0, + "completions/max_terminated_length": 1239.0, + "completions/mean_length": 947.8125, + "completions/mean_terminated_length": 947.8125, + "completions/min_length": 442.0, + "completions/min_terminated_length": 442.0, + "epoch": 0.016754188547136784, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4071639868975723, + "kl": 0.00145721435546875, + "learning_rate": 1.65e-07, + "loss": -0.0716, + "num_tokens": 3030188.0, + "reward": 0.0, + "reward_std": 0.9860392212867737, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10142699739926776, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08999948106350607, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15098442401882486, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 67 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1390.0, + "completions/max_terminated_length": 1390.0, + "completions/mean_length": 1128.6875, + "completions/mean_terminated_length": 1128.6875, + "completions/min_length": 828.0, + "completions/min_terminated_length": 828.0, + "epoch": 0.01700425106276569, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8659880854123085, + "kl": 0.0018520355224609375, + "learning_rate": 1.675e-07, + "loss": 0.0027, + "num_tokens": 3064087.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9341875910758972, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1420887036236777, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21975794705324614, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06440611887195308, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 68 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1291.9375, + "completions/mean_terminated_length": 1222.5833740234375, + "completions/min_length": 832.0, + "completions/min_terminated_length": 832.0, + "epoch": 0.0172543135783946, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.566543298801709, + "kl": 0.0012454986572265625, + "learning_rate": 1.7000000000000001e-07, + "loss": -0.019, + "num_tokens": 3107726.0, + "reward": 0.0, + "reward_std": 0.6228938102722168, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.14400719159611117, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15350336318691166, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10390522747338701, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 69 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1368.0, + "completions/max_terminated_length": 1368.0, + "completions/mean_length": 817.5625, + "completions/mean_terminated_length": 817.5625, + "completions/min_length": 356.0, + "completions/min_terminated_length": 356.0, + "epoch": 0.017504376094023506, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.892153609521041, + "kl": 0.00229644775390625, + "learning_rate": 1.725e-07, + "loss": -0.121, + "num_tokens": 3146815.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7122063636779785, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010933019673626683, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.019135407393491368, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11979921473804346, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 70 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1401.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1085.0, + "completions/mean_terminated_length": 1085.0, + "completions/min_length": 839.0, + "completions/min_terminated_length": 839.0, + "epoch": 0.017754438609652413, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.239720440218199, + "kl": 0.0017375946044921875, + "learning_rate": 1.75e-07, + "loss": -0.039, + "num_tokens": 3186127.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9096763134002686, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09104247282913551, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1002277257764946, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941139, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 71 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1325.0, + "completions/mean_length": 1385.0625, + "completions/mean_terminated_length": 1237.2857666015625, + "completions/min_length": 1141.0, + "completions/min_terminated_length": 1141.0, + "epoch": 0.01800450112528132, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.6606653428662759, + "kl": 0.00086212158203125, + "learning_rate": 1.775e-07, + "loss": 0.0261, + "num_tokens": 3238304.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.045023798942566, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.4611579480561342, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.32444879053177345, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6416666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 72 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1292.0, + "completions/max_terminated_length": 1292.0, + "completions/mean_length": 1026.4375, + "completions/mean_terminated_length": 1026.4375, + "completions/min_length": 732.0, + "completions/min_terminated_length": 732.0, + "epoch": 0.01825456364091023, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9748801437745294, + "kl": 0.002777099609375, + "learning_rate": 1.8e-07, + "loss": -0.0466, + "num_tokens": 3273095.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0488736629486084, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.033293855715928834, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08065434744474445, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 73 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1352.0, + "completions/mean_length": 1134.1875, + "completions/mean_terminated_length": 1109.800048828125, + "completions/min_length": 841.0, + "completions/min_terminated_length": 841.0, + "epoch": 0.018504626156539136, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8865452831955456, + "kl": 0.0016536712646484375, + "learning_rate": 1.825e-07, + "loss": 0.0064, + "num_tokens": 3323498.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.54623943567276, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07334155635430754, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.051415072420991664, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1520233900132184, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 74 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 1201.4375, + "completions/mean_terminated_length": 1065.727294921875, + "completions/min_length": 773.0, + "completions/min_terminated_length": 773.0, + "epoch": 0.018754688672168042, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.637223331625875, + "kl": 0.0019588470458984375, + "learning_rate": 1.85e-07, + "loss": 0.0078, + "num_tokens": 3384537.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.063974142074585, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.027594722256993578, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04863588976190303, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059628479399994404, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 75 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1218.25, + "completions/mean_terminated_length": 1124.3333740234375, + "completions/min_length": 892.0, + "completions/min_terminated_length": 892.0, + "epoch": 0.01900475118779695, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2030400539315123, + "kl": 0.00213623046875, + "learning_rate": 1.875e-07, + "loss": 0.0479, + "num_tokens": 3436573.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.030653715133667, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03329600306793299, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10200558679299362, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06440611887195309, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 76 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1472.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1083.875, + "completions/mean_terminated_length": 1083.875, + "completions/min_length": 797.0, + "completions/min_terminated_length": 797.0, + "epoch": 0.019254813703425855, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1425583257608367, + "kl": 0.0017070770263671875, + "learning_rate": 1.8999999999999998e-07, + "loss": 0.0162, + "num_tokens": 3471859.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0261328220367432, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04720300157537559, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06389161850209658, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563383, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 77 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1278.0, + "completions/max_terminated_length": 1278.0, + "completions/mean_length": 1059.8125, + "completions/mean_terminated_length": 1059.8125, + "completions/min_length": 627.0, + "completions/min_terminated_length": 627.0, + "epoch": 0.019504876219054765, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.404716973859101, + "kl": 0.001918792724609375, + "learning_rate": 1.9249999999999998e-07, + "loss": -0.0554, + "num_tokens": 3522552.0, + "reward": 0.0, + "reward_std": 0.9080052375793457, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009634190409626472, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10725714864630848, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087684, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 78 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1239.6875, + "completions/mean_terminated_length": 1202.5, + "completions/min_length": 972.0, + "completions/min_terminated_length": 972.0, + "epoch": 0.01975493873468367, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4991620683598956, + "kl": 0.0014333724975585938, + "learning_rate": 1.9499999999999999e-07, + "loss": 0.0342, + "num_tokens": 3579715.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0154008865356445, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05513500197405647, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20356773248768564, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026007, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 79 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 1112.5, + "completions/mean_terminated_length": 1086.666748046875, + "completions/min_length": 795.0, + "completions/min_terminated_length": 795.0, + "epoch": 0.020005001250312578, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.27139175182866, + "kl": 0.0016345977783203125, + "learning_rate": 1.975e-07, + "loss": 0.0492, + "num_tokens": 3626635.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0008465051651, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.048703968038327175, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09575594874594526, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059004080210452274, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 80 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1420.0, + "completions/mean_length": 1140.125, + "completions/mean_terminated_length": 1088.71435546875, + "completions/min_length": 826.0, + "completions/min_terminated_length": 826.0, + "epoch": 0.020255063765941484, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.29495361946534, + "kl": 0.0020351409912109375, + "learning_rate": 2e-07, + "loss": -0.0455, + "num_tokens": 3681493.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.966710090637207, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.019522166847385225, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.049035884553431476, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12988598989256067, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 81 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 1091.375, + "completions/mean_terminated_length": 1033.0, + "completions/min_length": 594.0, + "completions/min_terminated_length": 594.0, + "epoch": 0.020505126281570394, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2756823671395825, + "kl": 0.00203704833984375, + "learning_rate": 2.025e-07, + "loss": 0.0097, + "num_tokens": 3722819.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.3079444169998169, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10654174372521455, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15685110608549696, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14650243330048468, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 82 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1324.3125, + "completions/mean_terminated_length": 1244.45458984375, + "completions/min_length": 1056.0, + "completions/min_terminated_length": 1056.0, + "epoch": 0.0207551887971993, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.049373347309233, + "kl": 0.00214385986328125, + "learning_rate": 2.0499999999999997e-07, + "loss": 0.0, + "num_tokens": 3784616.0, + "reward": 0.0, + "reward_std": 0.6444264054298401, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.014944570067305023, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03427039428261783, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12641788434189793, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 83 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1282.0, + "completions/max_terminated_length": 1282.0, + "completions/mean_length": 1045.3125, + "completions/mean_terminated_length": 1045.3125, + "completions/min_length": 903.0, + "completions/min_terminated_length": 903.0, + "epoch": 0.021005251312828207, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.202752750073526, + "kl": 0.0019779205322265625, + "learning_rate": 2.0749999999999997e-07, + "loss": -0.0304, + "num_tokens": 3835453.0, + "reward": 0.0, + "reward_std": 0.8071345686912537, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03739788184825605, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06245194733338377, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194865, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 84 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1356.0, + "completions/max_terminated_length": 1356.0, + "completions/mean_length": 1045.9375, + "completions/mean_terminated_length": 1045.9375, + "completions/min_length": 704.0, + "completions/min_terminated_length": 704.0, + "epoch": 0.021255313828457113, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5604132537783215, + "kl": 0.001010894775390625, + "learning_rate": 2.0999999999999997e-07, + "loss": 0.005, + "num_tokens": 3872580.0, + "reward": 0.0, + "reward_std": 0.5239625573158264, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1286152003882351, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15974428385252493, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1517795672580372, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 85 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1113.0, + "completions/max_terminated_length": 1113.0, + "completions/mean_length": 881.25, + "completions/mean_terminated_length": 881.25, + "completions/min_length": 627.0, + "completions/min_terminated_length": 627.0, + "epoch": 0.021505376344086023, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3772883575014396, + "kl": 0.00196075439453125, + "learning_rate": 2.1249999999999998e-07, + "loss": -0.0017, + "num_tokens": 3907040.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.4579150676727295, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0015631823580413034, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.046595425583326244, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0768596604689834, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 86 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1399.0, + "completions/mean_length": 1264.1875, + "completions/mean_terminated_length": 1122.7000732421875, + "completions/min_length": 1027.0, + "completions/min_terminated_length": 1027.0, + "epoch": 0.02175543885971493, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.5943564935581553, + "kl": 0.0006747245788574219, + "learning_rate": 2.1499999999999998e-07, + "loss": -0.0288, + "num_tokens": 3962539.0, + "reward": 0.0, + "reward_std": 1.0243195295333862, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011823715031105344, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06152942220223279, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06426219440409449, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 87 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1202.0, + "completions/mean_length": 1239.4375, + "completions/mean_terminated_length": 1036.77783203125, + "completions/min_length": 822.0, + "completions/min_terminated_length": 822.0, + "epoch": 0.022005501375343836, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.829208358128923, + "kl": 0.00327301025390625, + "learning_rate": 2.1749999999999998e-07, + "loss": 0.0063, + "num_tokens": 4013602.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0469043254852295, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009073411877023183, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0744058848737644, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.65, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10470416879457553, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 88 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1092.0, + "completions/max_terminated_length": 1092.0, + "completions/mean_length": 928.875, + "completions/mean_terminated_length": 928.875, + "completions/min_length": 872.0, + "completions/min_terminated_length": 872.0, + "epoch": 0.022255563890972743, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1211322435945448, + "kl": 0.0007197856903076172, + "learning_rate": 2.1999999999999998e-07, + "loss": -0.0076, + "num_tokens": 4039672.0, + "reward": 0.0, + "reward_std": 0.41592514514923096, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03284225617797078, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06338936406521158, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1387777332977422, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 89 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1465.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1142.375, + "completions/mean_terminated_length": 1142.375, + "completions/min_length": 961.0, + "completions/min_terminated_length": 961.0, + "epoch": 0.02250562640660165, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.936106250930488, + "kl": 0.00167083740234375, + "learning_rate": 2.225e-07, + "loss": -0.0529, + "num_tokens": 4082462.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9025722742080688, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02940289328121365, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08249062140790385, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08944271909999162, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 90 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1318.0, + "completions/mean_length": 1144.3125, + "completions/mean_terminated_length": 1120.60009765625, + "completions/min_length": 921.0, + "completions/min_terminated_length": 921.0, + "epoch": 0.02275568892223056, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.501320506713968, + "kl": 0.0015192031860351562, + "learning_rate": 2.25e-07, + "loss": 0.003, + "num_tokens": 4132499.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5334848165512085, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01608152924204035, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1366124910567765, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09269623828717427, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 91 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1185.0, + "completions/max_terminated_length": 1185.0, + "completions/mean_length": 1087.625, + "completions/mean_terminated_length": 1087.625, + "completions/min_length": 1046.0, + "completions/min_terminated_length": 1046.0, + "epoch": 0.023005751437859465, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.998823193406686, + "kl": 0.00023433566093444824, + "learning_rate": 2.275e-07, + "loss": 0.0023, + "num_tokens": 4169477.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9468536376953125, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05508426891147999, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06664755137557996, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901863, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 92 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1206.3125, + "completions/mean_terminated_length": 1138.5384521484375, + "completions/min_length": 955.0, + "completions/min_terminated_length": 955.0, + "epoch": 0.023255813953488372, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.480919841159802, + "kl": 0.0011816024780273438, + "learning_rate": 2.3e-07, + "loss": 0.0044, + "num_tokens": 4211442.0, + "reward": 0.0, + "reward_std": 0.8174987435340881, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0038625151402001574, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07345122955578617, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.675, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 93 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1207.5625, + "completions/mean_terminated_length": 1140.0770263671875, + "completions/min_length": 766.0, + "completions/min_terminated_length": 766.0, + "epoch": 0.023505876469117278, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5608878167635303, + "kl": 0.002346038818359375, + "learning_rate": 2.325e-07, + "loss": -0.1167, + "num_tokens": 4266619.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9490822553634644, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01649294986343025, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04722545212739477, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0859586463881842, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 94 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1084.0, + "completions/mean_length": 966.75, + "completions/mean_terminated_length": 931.2000732421875, + "completions/min_length": 832.0, + "completions/min_terminated_length": 832.0, + "epoch": 0.023755938984746188, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4553230972640425, + "kl": 0.0007429122924804688, + "learning_rate": 2.3499999999999997e-07, + "loss": -0.0015, + "num_tokens": 4306503.0, + "reward": 0.0, + "reward_std": 0.6062840819358826, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05091542537350257, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07718042144728608, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 95 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1324.0, + "completions/max_terminated_length": 1324.0, + "completions/mean_length": 882.125, + "completions/mean_terminated_length": 882.125, + "completions/min_length": 612.0, + "completions/min_terminated_length": 612.0, + "epoch": 0.024006001500375095, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2222027455757356, + "kl": 0.001514434814453125, + "learning_rate": 2.3749999999999998e-07, + "loss": -0.0405, + "num_tokens": 4338769.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8007175922393799, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.028123002946240667, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09378406318709273, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09428090415820636, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 96 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 1155.8125, + "completions/mean_terminated_length": 1041.0833740234375, + "completions/min_length": 749.0, + "completions/min_terminated_length": 749.0, + "epoch": 0.024256064016004, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1563115792622436, + "kl": 0.002254486083984375, + "learning_rate": 2.4e-07, + "loss": -0.0221, + "num_tokens": 4379158.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0585118532180786, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.015410451136229946, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06613896161392802, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 97 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1434.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1050.6875, + "completions/mean_terminated_length": 1050.6875, + "completions/min_length": 777.0, + "completions/min_terminated_length": 777.0, + "epoch": 0.024506126531632907, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2064965307728057, + "kl": 0.0020236968994140625, + "learning_rate": 2.425e-07, + "loss": -0.013, + "num_tokens": 4419153.0, + "reward": -1.30385160446167e-08, + "reward_std": 1.0652867555618286, + "rewards/wordcountpos_reward_GEOBench/mean": -1.30385160446167e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04555155041311224, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09061124528282473, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10671873729054748, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 98 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 1001.4375, + "completions/mean_terminated_length": 968.2000732421875, + "completions/min_length": 662.0, + "completions/min_terminated_length": 662.0, + "epoch": 0.024756189047261814, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3546361540936642, + "kl": 0.0015430450439453125, + "learning_rate": 2.45e-07, + "loss": -0.0049, + "num_tokens": 4451864.0, + "reward": 0.0, + "reward_std": 0.6201164722442627, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.254086624135735, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.23778377344973659, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11855612829185828, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 99 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1261.0, + "completions/mean_length": 1099.4375, + "completions/mean_terminated_length": 1007.0000610351562, + "completions/min_length": 814.0, + "completions/min_terminated_length": 814.0, + "epoch": 0.025006251562890724, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3431911956331932, + "kl": 0.00226593017578125, + "learning_rate": 2.475e-07, + "loss": -0.0601, + "num_tokens": 4509239.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.46371403336524963, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.014725169620401256, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06984167872396013, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.067631901304592, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 100 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 963.0, + "completions/mean_length": 737.0, + "completions/mean_terminated_length": 686.1333618164062, + "completions/min_length": 586.0, + "completions/min_terminated_length": 586.0, + "epoch": 0.02525631407851963, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.7238271463061015, + "kl": 0.001453399658203125, + "learning_rate": 2.5e-07, + "loss": 0.0778, + "num_tokens": 4543807.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7137898206710815, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.21684972425115095, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2936830571685167, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 101 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1131.0, + "completions/max_terminated_length": 1131.0, + "completions/mean_length": 961.625, + "completions/mean_terminated_length": 961.625, + "completions/min_length": 778.0, + "completions/min_terminated_length": 778.0, + "epoch": 0.025506376594148537, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.982741911753655, + "kl": 0.002532958984375, + "learning_rate": 2.5249999999999996e-07, + "loss": -0.0554, + "num_tokens": 4585745.0, + "reward": 0.0, + "reward_std": 0.9972680807113647, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0568569406579029, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09264553384451292, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10318986456114841, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 102 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1414.0, + "completions/mean_length": 1125.1875, + "completions/mean_terminated_length": 1000.25, + "completions/min_length": 740.0, + "completions/min_terminated_length": 740.0, + "epoch": 0.025756439109777443, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.584646687221917, + "kl": 0.0013103485107421875, + "learning_rate": 2.55e-07, + "loss": -0.0655, + "num_tokens": 4638436.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8415586948394775, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.013580486275114224, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1057799448864468, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13871099718746435, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 103 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1243.5625, + "completions/mean_terminated_length": 1127.0, + "completions/min_length": 846.0, + "completions/min_terminated_length": 846.0, + "epoch": 0.026006501625406353, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.95301813919548, + "kl": 0.002101898193359375, + "learning_rate": 2.5749999999999997e-07, + "loss": -0.0085, + "num_tokens": 4701797.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0231704711914062, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.035364865424973906, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04172127113723292, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078612, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 104 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1394.125, + "completions/mean_terminated_length": 1369.6923828125, + "completions/min_length": 975.0, + "completions/min_terminated_length": 975.0, + "epoch": 0.02625656414103526, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8464372388236705, + "kl": 0.0008151531219482422, + "learning_rate": 2.6e-07, + "loss": 0.0024, + "num_tokens": 4737607.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6402871608734131, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.021062353745052994, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03740126152286246, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901862, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 105 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1260.875, + "completions/mean_terminated_length": 1117.4000244140625, + "completions/min_length": 835.0, + "completions/min_terminated_length": 835.0, + "epoch": 0.026506626656664166, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.628315438898178, + "kl": 0.002727508544921875, + "learning_rate": 2.625e-07, + "loss": -0.0092, + "num_tokens": 4787349.0, + "reward": 0.0, + "reward_std": 1.0350193977355957, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08220349476406887, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10921345029715404, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15299479536052005, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 106 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1401.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1033.375, + "completions/mean_terminated_length": 1033.375, + "completions/min_length": 646.0, + "completions/min_terminated_length": 646.0, + "epoch": 0.026756689172293072, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0784171994987446, + "kl": 0.002048492431640625, + "learning_rate": 2.65e-07, + "loss": 0.0242, + "num_tokens": 4839251.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9766092300415039, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.27021831374394184, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2079118975629674, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 107 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1427.0, + "completions/mean_length": 1349.75, + "completions/mean_terminated_length": 1232.888916015625, + "completions/min_length": 985.0, + "completions/min_terminated_length": 985.0, + "epoch": 0.02700675168792198, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6608162905874484, + "kl": 0.001922607421875, + "learning_rate": 2.675e-07, + "loss": -0.0198, + "num_tokens": 4893215.0, + "reward": 0.0, + "reward_std": 0.9312055110931396, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019561955243087438, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05351356230901217, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298364, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 108 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1020.0, + "completions/max_terminated_length": 1020.0, + "completions/mean_length": 876.0625, + "completions/mean_terminated_length": 876.0625, + "completions/min_length": 676.0, + "completions/min_terminated_length": 676.0, + "epoch": 0.02725681420355089, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7117769063180783, + "kl": 0.0020961761474609375, + "learning_rate": 2.7e-07, + "loss": -0.029, + "num_tokens": 4928032.0, + "reward": -4.842877388000488e-08, + "reward_std": 1.037239909172058, + "rewards/wordcountpos_reward_GEOBench/mean": -4.842877388000488e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01462156309621361, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06386453074136239, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10036968702787748, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 109 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1077.0, + "completions/max_terminated_length": 1077.0, + "completions/mean_length": 834.4375, + "completions/mean_terminated_length": 834.4375, + "completions/min_length": 661.0, + "completions/min_terminated_length": 661.0, + "epoch": 0.027506876719179795, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1362633259765778, + "kl": 0.0012445449829101562, + "learning_rate": 2.725e-07, + "loss": 0.0286, + "num_tokens": 4969527.0, + "reward": 0.0, + "reward_std": 0.6638973355293274, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0009303692083795926, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21472417428673446, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078612, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 110 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1421.0, + "completions/mean_length": 1144.5, + "completions/mean_terminated_length": 1093.71435546875, + "completions/min_length": 722.0, + "completions/min_terminated_length": 722.0, + "epoch": 0.0277569392348087, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0683811286186953, + "kl": 0.0009098052978515625, + "learning_rate": 2.75e-07, + "loss": 0.0193, + "num_tokens": 5004583.0, + "reward": 0.0, + "reward_std": 0.9538509845733643, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.29096474188817173, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.532968063810306, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17716909687891083, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 111 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1112.0, + "completions/max_terminated_length": 1112.0, + "completions/mean_length": 907.5625, + "completions/mean_terminated_length": 907.5625, + "completions/min_length": 504.0, + "completions/min_terminated_length": 504.0, + "epoch": 0.028007001750437608, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4887450501290753, + "kl": 0.0011463165283203125, + "learning_rate": 2.775e-07, + "loss": -0.0091, + "num_tokens": 5033240.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9888060688972473, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.037564951238851806, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06936106285187789, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06652763279965644, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 112 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1391.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1023.8125, + "completions/mean_terminated_length": 1023.8125, + "completions/min_length": 813.0, + "completions/min_terminated_length": 813.0, + "epoch": 0.028257064266066518, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.643272195798035, + "kl": 0.0012454986572265625, + "learning_rate": 2.8e-07, + "loss": -0.0441, + "num_tokens": 5071565.0, + "reward": 0.0, + "reward_std": 0.6139549016952515, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0061897652398535214, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19523994138359146, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869926, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 113 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1311.0, + "completions/max_terminated_length": 1311.0, + "completions/mean_length": 1002.0, + "completions/mean_terminated_length": 1002.0, + "completions/min_length": 737.0, + "completions/min_terminated_length": 737.0, + "epoch": 0.028507126781695424, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6586731765515146, + "kl": 0.0027217864990234375, + "learning_rate": 2.8249999999999994e-07, + "loss": 0.0094, + "num_tokens": 5117157.0, + "reward": 0.0, + "reward_std": 1.0075820684432983, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09530478367868206, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08997096415490217, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316813, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 114 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1053.0, + "completions/max_terminated_length": 1053.0, + "completions/mean_length": 898.5625, + "completions/mean_terminated_length": 898.5625, + "completions/min_length": 747.0, + "completions/min_terminated_length": 747.0, + "epoch": 0.02875718929732433, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2381815955294586, + "kl": 0.0017871856689453125, + "learning_rate": 2.8499999999999997e-07, + "loss": -0.0472, + "num_tokens": 5154534.0, + "reward": 0.0, + "reward_std": 0.6959640383720398, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0021811916016728442, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.024612042329920102, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07290277645477447, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 115 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1328.5625, + "completions/mean_terminated_length": 1157.125, + "completions/min_length": 304.0, + "completions/min_terminated_length": 304.0, + "epoch": 0.029007251812953237, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4271066263989742, + "kl": 0.0018911361694335938, + "learning_rate": 2.8749999999999995e-07, + "loss": -0.0486, + "num_tokens": 5209087.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0121264457702637, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01651416722981401, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03522146831011621, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 116 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1477.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1178.9375, + "completions/mean_terminated_length": 1178.9375, + "completions/min_length": 847.0, + "completions/min_terminated_length": 847.0, + "epoch": 0.029257314328582147, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0233227367199023, + "kl": 0.001781463623046875, + "learning_rate": 2.9e-07, + "loss": -0.0298, + "num_tokens": 5253230.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9901900887489319, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1383935692093195, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15170305496620878, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.070841502796867, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 117 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1222.0, + "completions/max_terminated_length": 1222.0, + "completions/mean_length": 943.375, + "completions/mean_terminated_length": 943.375, + "completions/min_length": 601.0, + "completions/min_terminated_length": 601.0, + "epoch": 0.029507376844211054, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9545561038929464, + "kl": 0.00243377685546875, + "learning_rate": 2.9249999999999995e-07, + "loss": -0.0375, + "num_tokens": 5301572.0, + "reward": 3.725290298461914e-08, + "reward_std": 1.0564510822296143, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.15301330266396232, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.27493141283052847, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042256, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 118 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1256.0, + "completions/max_terminated_length": 1256.0, + "completions/mean_length": 1028.875, + "completions/mean_terminated_length": 1028.875, + "completions/min_length": 594.0, + "completions/min_terminated_length": 594.0, + "epoch": 0.02975743935983996, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7025634878245732, + "kl": 0.002437591552734375, + "learning_rate": 2.95e-07, + "loss": -0.0477, + "num_tokens": 5357394.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9683343768119812, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.003340710288379313, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.013362841153517251, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12292725943057185, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 119 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1424.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1127.4375, + "completions/mean_terminated_length": 1127.4375, + "completions/min_length": 942.0, + "completions/min_terminated_length": 942.0, + "epoch": 0.030007501875468866, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8450260764380193, + "kl": 0.00278472900390625, + "learning_rate": 2.9749999999999996e-07, + "loss": -0.0649, + "num_tokens": 5405417.0, + "reward": 0.0, + "reward_std": 0.9815717935562134, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.034553082121358034, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12605322179568523, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10602235962635778, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 120 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1418.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 964.0, + "completions/mean_terminated_length": 964.0, + "completions/min_length": 646.0, + "completions/min_terminated_length": 646.0, + "epoch": 0.030257564391097773, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2284304283234664, + "kl": 0.0016918182373046875, + "learning_rate": 3e-07, + "loss": 0.0652, + "num_tokens": 5444153.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.825506329536438, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01828380323442647, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08283982323964569, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13305526559931294, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 121 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1402.0, + "completions/mean_length": 1054.4375, + "completions/mean_terminated_length": 951.6154174804688, + "completions/min_length": 604.0, + "completions/min_terminated_length": 604.0, + "epoch": 0.030507626906726683, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6730293780924717, + "kl": 0.00252532958984375, + "learning_rate": 3.0249999999999996e-07, + "loss": 0.0195, + "num_tokens": 5482888.0, + "reward": 0.0, + "reward_std": 0.7187535762786865, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408155, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 122 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1219.0, + "completions/max_terminated_length": 1219.0, + "completions/mean_length": 922.0, + "completions/mean_terminated_length": 922.0, + "completions/min_length": 683.0, + "completions/min_terminated_length": 683.0, + "epoch": 0.03075768942235559, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5315264013899843, + "kl": 0.0021991729736328125, + "learning_rate": 3.05e-07, + "loss": -0.0257, + "num_tokens": 5524376.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0593985319137573, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009584681247546958, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06429136250294845, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.060705726131767744, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 123 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1391.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1097.5, + "completions/mean_terminated_length": 1097.5, + "completions/min_length": 845.0, + "completions/min_terminated_length": 845.0, + "epoch": 0.031007751937984496, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.848348711238791, + "kl": 0.00165557861328125, + "learning_rate": 3.0749999999999997e-07, + "loss": -0.0088, + "num_tokens": 5560696.0, + "reward": 0.0, + "reward_std": 0.8425840139389038, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0653587560026582, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10639857626279688, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09108400680852978, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 124 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1324.0, + "completions/max_terminated_length": 1324.0, + "completions/mean_length": 1029.5625, + "completions/mean_terminated_length": 1029.5625, + "completions/min_length": 651.0, + "completions/min_terminated_length": 651.0, + "epoch": 0.031257814453613406, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5179376436799856, + "kl": 0.00232696533203125, + "learning_rate": 3.1e-07, + "loss": -0.0711, + "num_tokens": 5605777.0, + "reward": 0.0, + "reward_std": 0.710605263710022, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.030231723454569072, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05190526032012107, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08777074514725114, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 125 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1273.0, + "completions/max_terminated_length": 1273.0, + "completions/mean_length": 984.75, + "completions/mean_terminated_length": 984.75, + "completions/min_length": 695.0, + "completions/min_terminated_length": 695.0, + "epoch": 0.03150787696924231, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1153450707963315, + "kl": 0.0016031265258789062, + "learning_rate": 3.1249999999999997e-07, + "loss": 0.0089, + "num_tokens": 5642013.0, + "reward": 4.470348358154297e-08, + "reward_std": 0.9987987279891968, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03383639659855257, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07969757269939194, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12405196043952262, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 126 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1419.0, + "completions/mean_length": 1256.3125, + "completions/mean_terminated_length": 1200.0770263671875, + "completions/min_length": 976.0, + "completions/min_terminated_length": 976.0, + "epoch": 0.03175793948487122, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5686808356362514, + "kl": 0.0013370513916015625, + "learning_rate": 3.15e-07, + "loss": 0.0124, + "num_tokens": 5688714.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7132453918457031, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.028529815026896006, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08518664138096801, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087683, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 127 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1428.0, + "completions/max_terminated_length": 1428.0, + "completions/mean_length": 1184.8125, + "completions/mean_terminated_length": 1184.8125, + "completions/min_length": 831.0, + "completions/min_terminated_length": 831.0, + "epoch": 0.03200800200050013, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.491246498743425, + "kl": 0.001346588134765625, + "learning_rate": 3.175e-07, + "loss": -0.0024, + "num_tokens": 5732095.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8698396682739258, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04168005095697119, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15223562252010023, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869927, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 128 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1318.0, + "completions/mean_length": 1218.6875, + "completions/mean_terminated_length": 999.888916015625, + "completions/min_length": 787.0, + "completions/min_terminated_length": 787.0, + "epoch": 0.03225806451612903, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6078198141497837, + "kl": 0.0017566680908203125, + "learning_rate": 3.2e-07, + "loss": -0.0018, + "num_tokens": 5776682.0, + "reward": 0.0, + "reward_std": 0.8985263705253601, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06338732922236485, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15718476050110347, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6124999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08509254221575906, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 129 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 1500.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 1500.0, + "completions/min_terminated_length": 0.0, + "epoch": 0.03250812703175794, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.6506275917572666, + "kl": 0.0007429122924804688, + "learning_rate": 3.225e-07, + "loss": 0.0, + "num_tokens": 5843786.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0557719469070435, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.024289280525682456, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04444416978402356, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 130 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1212.6875, + "completions/mean_terminated_length": 1040.300048828125, + "completions/min_length": 724.0, + "completions/min_terminated_length": 724.0, + "epoch": 0.032758189547386844, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8362862694025126, + "kl": 0.0017337799072265625, + "learning_rate": 3.25e-07, + "loss": -0.0256, + "num_tokens": 5883085.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.067497968673706, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07493437048782088, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11148236876360786, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078611, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 131 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1300.0, + "completions/max_terminated_length": 1300.0, + "completions/mean_length": 1082.4375, + "completions/mean_terminated_length": 1082.4375, + "completions/min_length": 664.0, + "completions/min_terminated_length": 664.0, + "epoch": 0.033008252063015754, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.453558492524879, + "kl": 0.00255584716796875, + "learning_rate": 3.275e-07, + "loss": -0.0599, + "num_tokens": 5919276.0, + "reward": 0.0, + "reward_std": 0.7440958023071289, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10436876588470634, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17153495753287243, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 132 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1317.0, + "completions/max_terminated_length": 1317.0, + "completions/mean_length": 1230.1875, + "completions/mean_terminated_length": 1230.1875, + "completions/min_length": 1103.0, + "completions/min_terminated_length": 1103.0, + "epoch": 0.033258314578644664, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1523228885058403, + "kl": 0.0010352134704589844, + "learning_rate": 3.3e-07, + "loss": 0.0088, + "num_tokens": 5971279.0, + "reward": 0.0, + "reward_std": 0.9889296293258667, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010586621215658058, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08893943837420341, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746357, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 133 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1105.0, + "completions/max_terminated_length": 1105.0, + "completions/mean_length": 796.875, + "completions/mean_terminated_length": 796.875, + "completions/min_length": 340.0, + "completions/min_terminated_length": 340.0, + "epoch": 0.03350837709427357, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.377330689531196, + "kl": 0.0009851455688476562, + "learning_rate": 3.325e-07, + "loss": -0.0323, + "num_tokens": 5999101.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6408165693283081, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.004567143661635577, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08910716688870268, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10532137766186216, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 134 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1130.4375, + "completions/mean_terminated_length": 1045.1539306640625, + "completions/min_length": 476.0, + "completions/min_terminated_length": 476.0, + "epoch": 0.03375843960990248, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2069301836286632, + "kl": 0.002410888671875, + "learning_rate": 3.35e-07, + "loss": -0.0531, + "num_tokens": 6057244.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.8819225430488586, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.041100670876597946, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08432650041587254, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10945995377982527, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 135 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1202.0, + "completions/max_terminated_length": 1202.0, + "completions/mean_length": 939.625, + "completions/mean_terminated_length": 939.625, + "completions/min_length": 746.0, + "completions/min_terminated_length": 746.0, + "epoch": 0.03400850212553138, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6787933887533844, + "kl": 0.0013608932495117188, + "learning_rate": 3.375e-07, + "loss": -0.0354, + "num_tokens": 6091014.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.42292171716690063, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.007861912763444655, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08080501386121948, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08595864638818417, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 136 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1410.0, + "completions/mean_length": 1205.3125, + "completions/mean_terminated_length": 1185.666748046875, + "completions/min_length": 942.0, + "completions/min_terminated_length": 942.0, + "epoch": 0.03425856464116029, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6442427205568935, + "kl": 0.001277923583984375, + "learning_rate": 3.4000000000000003e-07, + "loss": -0.0266, + "num_tokens": 6133491.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5531470775604248, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.18354519224061897, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14740854365565892, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 137 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1344.0, + "completions/max_terminated_length": 1344.0, + "completions/mean_length": 1002.625, + "completions/mean_terminated_length": 1002.625, + "completions/min_length": 747.0, + "completions/min_terminated_length": 747.0, + "epoch": 0.0345086271567892, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4399054558162234, + "kl": 0.0021190643310546875, + "learning_rate": 3.425e-07, + "loss": 0.0064, + "num_tokens": 6174901.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5891343355178833, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.021474344125256336, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10287668980752833, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17805533888009004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 138 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1274.0, + "completions/max_terminated_length": 1274.0, + "completions/mean_length": 981.25, + "completions/mean_terminated_length": 981.25, + "completions/min_length": 737.0, + "completions/min_terminated_length": 737.0, + "epoch": 0.0347586896724181, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0143874373080317, + "kl": 0.001514434814453125, + "learning_rate": 3.45e-07, + "loss": -0.0109, + "num_tokens": 6227785.0, + "reward": 0.0, + "reward_std": 1.024099349975586, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0009414156219031871, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.059882871676529746, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0938872452190116, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 139 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1255.125, + "completions/mean_terminated_length": 1198.615478515625, + "completions/min_length": 975.0, + "completions/min_terminated_length": 975.0, + "epoch": 0.03500875218804701, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7146160425600523, + "kl": 0.0018310546875, + "learning_rate": 3.4749999999999996e-07, + "loss": -0.0287, + "num_tokens": 6283827.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9553406238555908, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.2554583935477641, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1763033094581359, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 140 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1237.0, + "completions/max_terminated_length": 1237.0, + "completions/mean_length": 951.4375, + "completions/mean_terminated_length": 951.4375, + "completions/min_length": 697.0, + "completions/min_terminated_length": 697.0, + "epoch": 0.035258814703675916, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2385667094398203, + "kl": 0.0018634796142578125, + "learning_rate": 3.5e-07, + "loss": 0.0009, + "num_tokens": 6330018.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7626720666885376, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01860800740153537, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09975287854938328, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0894427190999916, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 141 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1360.0, + "completions/mean_length": 1289.75, + "completions/mean_terminated_length": 1079.5, + "completions/min_length": 835.0, + "completions/min_terminated_length": 835.0, + "epoch": 0.035508877219304825, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.699642091433762, + "kl": 0.0017414093017578125, + "learning_rate": 3.5249999999999996e-07, + "loss": 0.0162, + "num_tokens": 6391566.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.017782211303711, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011559478435247232, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03199765141274106, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06440611887195309, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 142 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1459.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1149.5, + "completions/mean_terminated_length": 1149.5, + "completions/min_length": 366.0, + "completions/min_terminated_length": 366.0, + "epoch": 0.035758939734933735, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8700549804190856, + "kl": 0.0015287399291992188, + "learning_rate": 3.55e-07, + "loss": -0.108, + "num_tokens": 6442878.0, + "reward": 0.0, + "reward_std": 0.5676659345626831, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10171241832366507, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07283005904806879, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16307235385739852, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 143 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1167.0, + "completions/max_terminated_length": 1167.0, + "completions/mean_length": 1013.5, + "completions/mean_terminated_length": 1013.5, + "completions/min_length": 754.0, + "completions/min_terminated_length": 754.0, + "epoch": 0.03600900225056264, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.667021680945365, + "kl": 0.00112152099609375, + "learning_rate": 3.5749999999999997e-07, + "loss": -0.01, + "num_tokens": 6490870.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.8150209188461304, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.19112912749411132, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1878380751381778, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16487930490266264, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 144 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1064.0, + "completions/mean_length": 1192.3125, + "completions/mean_terminated_length": 884.625, + "completions/min_length": 739.0, + "completions/min_terminated_length": 739.0, + "epoch": 0.03625906476619155, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.481164057727238, + "kl": 0.002979278564453125, + "learning_rate": 3.6e-07, + "loss": -0.029, + "num_tokens": 6540891.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.979060173034668, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.047785787582778357, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05882718192334414, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6416666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 145 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1452.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 983.8125, + "completions/mean_terminated_length": 983.8125, + "completions/min_length": 688.0, + "completions/min_terminated_length": 688.0, + "epoch": 0.03650912728182046, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3540838363100605, + "kl": 0.001163482666015625, + "learning_rate": 3.6249999999999997e-07, + "loss": -0.0767, + "num_tokens": 6581000.0, + "reward": 0.0, + "reward_std": 1.0295701026916504, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.16440164213579286, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.147082576495672, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07187952884282608, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 146 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1118.9375, + "completions/mean_terminated_length": 1093.533447265625, + "completions/min_length": 639.0, + "completions/min_terminated_length": 639.0, + "epoch": 0.03675918979744936, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.660577016190452, + "kl": 0.003009796142578125, + "learning_rate": 3.65e-07, + "loss": -0.0449, + "num_tokens": 6621911.0, + "reward": 0.0, + "reward_std": 0.8658617734909058, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04321878857425389, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.043221171659168486, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15817243286527055, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 147 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.9375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 1494.6875, + "completions/mean_terminated_length": 1415.0, + "completions/min_length": 1415.0, + "completions/min_terminated_length": 1415.0, + "epoch": 0.03700925231307827, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.3534214372274425, + "kl": 0.00057220458984375, + "learning_rate": 3.675e-07, + "loss": -0.0008, + "num_tokens": 6663722.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9075744152069092, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.045519105922417064, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07534658085856863, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 148 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1349.0, + "completions/mean_length": 1120.9375, + "completions/mean_terminated_length": 1066.7857666015625, + "completions/min_length": 684.0, + "completions/min_terminated_length": 684.0, + "epoch": 0.037259314828707174, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2315642878811106, + "kl": 0.001941680908203125, + "learning_rate": 3.7e-07, + "loss": -0.0162, + "num_tokens": 6704649.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.9046562910079956, + "rewards/wordcountpos_reward_GEOBench/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02095425287977022, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14669741824374657, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06426219440409449, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 149 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1049.0, + "completions/max_terminated_length": 1049.0, + "completions/mean_length": 898.125, + "completions/mean_terminated_length": 898.125, + "completions/min_length": 761.0, + "completions/min_terminated_length": 761.0, + "epoch": 0.037509377344336084, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.6684986554989318, + "kl": 0.00011566281318664551, + "learning_rate": 3.725e-07, + "loss": -0.015, + "num_tokens": 6744035.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0532549619674683, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.055344626102484105, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08960762140543238, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06763190130459203, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 150 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1412.0, + "completions/mean_length": 1309.5, + "completions/mean_terminated_length": 1195.2000732421875, + "completions/min_length": 929.0, + "completions/min_terminated_length": 929.0, + "epoch": 0.037759439859964994, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0177347107988886, + "kl": 0.0011568069458007812, + "learning_rate": 3.75e-07, + "loss": -0.0328, + "num_tokens": 6795075.0, + "reward": 0.0, + "reward_std": 1.057640552520752, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.031050191461906816, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0651473872373188, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07290277645477444, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 151 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1414.0, + "completions/mean_length": 1227.875, + "completions/mean_terminated_length": 1165.0770263671875, + "completions/min_length": 983.0, + "completions/min_terminated_length": 983.0, + "epoch": 0.0380095023755939, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.37000706459023, + "kl": 0.002513885498046875, + "learning_rate": 3.775e-07, + "loss": -0.0442, + "num_tokens": 6845225.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9845694303512573, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07962982704647113, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06953715069935648, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 152 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1461.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1156.5625, + "completions/mean_terminated_length": 1156.5625, + "completions/min_length": 870.0, + "completions/min_terminated_length": 870.0, + "epoch": 0.03825956489122281, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5340761811332686, + "kl": 0.002468109130859375, + "learning_rate": 3.7999999999999996e-07, + "loss": -0.0099, + "num_tokens": 6892114.0, + "reward": 0.0, + "reward_std": 0.78373783826828, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03754427950171062, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13386443477030455, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05000000000000004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 153 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1013.0, + "completions/max_terminated_length": 1013.0, + "completions/mean_length": 786.0625, + "completions/mean_terminated_length": 786.0625, + "completions/min_length": 402.0, + "completions/min_terminated_length": 402.0, + "epoch": 0.03850962740685171, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1889643134576975, + "kl": 0.0008420944213867188, + "learning_rate": 3.825e-07, + "loss": -0.0493, + "num_tokens": 6923563.0, + "reward": 0.0, + "reward_std": 0.9449535608291626, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.005199281936684328, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05564313148656546, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087683, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 154 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1269.0, + "completions/mean_length": 1034.25, + "completions/mean_terminated_length": 1003.2000732421875, + "completions/min_length": 715.0, + "completions/min_terminated_length": 715.0, + "epoch": 0.03875968992248062, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.450638935831109, + "kl": 0.0023651123046875, + "learning_rate": 3.8499999999999997e-07, + "loss": 0.0274, + "num_tokens": 6951775.0, + "reward": 0.0, + "reward_std": 0.9391560554504395, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1238598319863966, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18205195044864445, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11279282877125756, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 155 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1399.0, + "completions/mean_length": 1161.0625, + "completions/mean_terminated_length": 1138.4666748046875, + "completions/min_length": 723.0, + "completions/min_terminated_length": 723.0, + "epoch": 0.03900975243810953, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.482743180255012, + "kl": 0.002513885498046875, + "learning_rate": 3.875e-07, + "loss": 0.0119, + "num_tokens": 6987776.0, + "reward": 0.0, + "reward_std": 0.43920376896858215, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12819776512444084, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08593575009678209, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11792967144619462, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 156 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1241.0, + "completions/max_terminated_length": 1241.0, + "completions/mean_length": 1044.625, + "completions/mean_terminated_length": 1044.625, + "completions/min_length": 855.0, + "completions/min_terminated_length": 855.0, + "epoch": 0.03925981495373843, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6642801604558843, + "kl": 0.002227783203125, + "learning_rate": 3.8999999999999997e-07, + "loss": -0.0268, + "num_tokens": 7031394.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9692599177360535, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06303064323372125, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09409862624641008, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 157 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1220.625, + "completions/mean_terminated_length": 1202.0001220703125, + "completions/min_length": 970.0, + "completions/min_terminated_length": 970.0, + "epoch": 0.03950987746936734, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1988938830247537, + "kl": 0.002399444580078125, + "learning_rate": 3.925e-07, + "loss": 0.0033, + "num_tokens": 7084028.0, + "reward": 0.0, + "reward_std": 0.6725486516952515, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.011476113007200427, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16594073088643463, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036264, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 158 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1318.8125, + "completions/mean_terminated_length": 1258.416748046875, + "completions/min_length": 939.0, + "completions/min_terminated_length": 939.0, + "epoch": 0.03975993998499625, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.440519336681081, + "kl": 0.002613067626953125, + "learning_rate": 3.95e-07, + "loss": -0.0482, + "num_tokens": 7129825.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0367779731750488, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.035018190344437074, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07498615194286763, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 159 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1141.3125, + "completions/mean_terminated_length": 1090.071533203125, + "completions/min_length": 653.0, + "completions/min_terminated_length": 653.0, + "epoch": 0.040010002500625155, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5683796309460103, + "kl": 0.0012617111206054688, + "learning_rate": 3.975e-07, + "loss": -0.0542, + "num_tokens": 7175198.0, + "reward": 0.0, + "reward_std": 0.991832971572876, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01725903969048362, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09982833797636895, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639733, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 160 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1351.0, + "completions/mean_length": 1098.625, + "completions/mean_terminated_length": 1041.2857666015625, + "completions/min_length": 796.0, + "completions/min_terminated_length": 796.0, + "epoch": 0.040260065016254065, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.769653221268777, + "kl": 0.004611968994140625, + "learning_rate": 4e-07, + "loss": 0.031, + "num_tokens": 7225096.0, + "reward": 4.470348358154297e-08, + "reward_std": 0.9515671730041504, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08404377174265541, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07353678445683015, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408154, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 161 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1411.0, + "completions/mean_length": 1008.75, + "completions/mean_terminated_length": 845.0, + "completions/min_length": 458.0, + "completions/min_terminated_length": 458.0, + "epoch": 0.04051012753188297, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.927091083451165, + "kl": 0.0024871826171875, + "learning_rate": 4.025e-07, + "loss": 0.0176, + "num_tokens": 7254796.0, + "reward": 0.0, + "reward_std": 0.7449043393135071, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.099, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10224676033987581, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5833333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13877773329774218, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 162 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1034.0, + "completions/max_terminated_length": 1034.0, + "completions/mean_length": 809.75, + "completions/mean_terminated_length": 809.75, + "completions/min_length": 598.0, + "completions/min_terminated_length": 598.0, + "epoch": 0.04076019004751188, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5802328228268694, + "kl": 0.001789093017578125, + "learning_rate": 4.05e-07, + "loss": -0.0051, + "num_tokens": 7290296.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.038016438484192, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10298199984840055, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11850711900147308, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06978803887752094, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 163 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1395.0, + "completions/max_terminated_length": 1395.0, + "completions/mean_length": 1158.1875, + "completions/mean_terminated_length": 1158.1875, + "completions/min_length": 960.0, + "completions/min_terminated_length": 960.0, + "epoch": 0.04101025256314079, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1441787255405815, + "kl": 0.0018138885498046875, + "learning_rate": 4.0749999999999996e-07, + "loss": -0.0015, + "num_tokens": 7329355.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8182548880577087, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12696235971679484, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12660106299308777, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10878112581387149, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 164 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1255.0, + "completions/max_terminated_length": 1255.0, + "completions/mean_length": 975.1875, + "completions/mean_terminated_length": 975.1875, + "completions/min_length": 698.0, + "completions/min_terminated_length": 698.0, + "epoch": 0.04126031507876969, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2160048375332906, + "kl": 0.001972198486328125, + "learning_rate": 4.0999999999999994e-07, + "loss": -0.0514, + "num_tokens": 7367574.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.8688175678253174, + "rewards/wordcountpos_reward_GEOBench/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03463976561187214, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04970411750888591, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08255189164891874, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 165 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1294.0, + "completions/max_terminated_length": 1294.0, + "completions/mean_length": 1039.5625, + "completions/mean_terminated_length": 1039.5625, + "completions/min_length": 800.0, + "completions/min_terminated_length": 800.0, + "epoch": 0.0415103775943986, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4522321049640814, + "kl": 0.001796722412109375, + "learning_rate": 4.1249999999999997e-07, + "loss": -0.0087, + "num_tokens": 7414639.0, + "reward": -3.725290298461914e-09, + "reward_std": 0.9890655279159546, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.012528437131161826, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02807175871225778, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10602235962635781, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 166 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1155.5, + "completions/mean_terminated_length": 1106.2857666015625, + "completions/min_length": 873.0, + "completions/min_terminated_length": 873.0, + "epoch": 0.041760440110027504, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.188695031393865, + "kl": 0.002227783203125, + "learning_rate": 4.1499999999999994e-07, + "loss": -0.0258, + "num_tokens": 7462063.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0615979433059692, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05799799766133702, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06872716636106774, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.65, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10470416879457552, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 167 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1051.5, + "completions/mean_terminated_length": 948.0000610351562, + "completions/min_length": 640.0, + "completions/min_terminated_length": 640.0, + "epoch": 0.042010502625656414, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4040509672717634, + "kl": 0.0013103485107421875, + "learning_rate": 4.1749999999999997e-07, + "loss": -0.1013, + "num_tokens": 7502031.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0628480911254883, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07497988939252766, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12726410748550823, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6499999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06666666666666665, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 168 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1431.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1187.0625, + "completions/mean_terminated_length": 1187.0625, + "completions/min_length": 906.0, + "completions/min_terminated_length": 906.0, + "epoch": 0.042260565141285324, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.607969621003654, + "kl": 0.002696990966796875, + "learning_rate": 4.1999999999999995e-07, + "loss": 0.0335, + "num_tokens": 7554736.0, + "reward": 0.0, + "reward_std": 0.6876246333122253, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011872482934303966, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10758283358289526, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09418264367902598, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 169 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1210.0, + "completions/max_terminated_length": 1210.0, + "completions/mean_length": 854.5625, + "completions/mean_terminated_length": 854.5625, + "completions/min_length": 596.0, + "completions/min_terminated_length": 596.0, + "epoch": 0.04251062765691423, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.335296933507707, + "kl": 0.003124237060546875, + "learning_rate": 4.225e-07, + "loss": -0.0151, + "num_tokens": 7592649.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0604522228240967, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.031663010357784606, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05092961171144992, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133131, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 170 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 1151.25, + "completions/mean_terminated_length": 1128.0, + "completions/min_length": 610.0, + "completions/min_terminated_length": 610.0, + "epoch": 0.04276069017254314, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8561325986403534, + "kl": 0.0018215179443359375, + "learning_rate": 4.2499999999999995e-07, + "loss": -0.081, + "num_tokens": 7635501.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0415858030319214, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.025595512858817587, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08319192796422085, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16459827639617797, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 171 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1216.0, + "completions/max_terminated_length": 1216.0, + "completions/mean_length": 960.5, + "completions/mean_terminated_length": 960.5, + "completions/min_length": 543.0, + "completions/min_terminated_length": 543.0, + "epoch": 0.043010752688172046, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.72016379404415, + "kl": 0.0011653900146484375, + "learning_rate": 4.275e-07, + "loss": 0.0131, + "num_tokens": 7675205.0, + "reward": 0.0, + "reward_std": 0.9642163515090942, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07559715656872291, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10358227598897685, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05821416398857662, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 172 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1217.0, + "completions/max_terminated_length": 1217.0, + "completions/mean_length": 979.4375, + "completions/mean_terminated_length": 979.4375, + "completions/min_length": 789.0, + "completions/min_terminated_length": 789.0, + "epoch": 0.04326081520380095, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.908338744001443, + "kl": 0.002643585205078125, + "learning_rate": 4.2999999999999996e-07, + "loss": -0.0333, + "num_tokens": 7718756.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.0276358127593994, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0156288943898526, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08203131180840093, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14851112939963643, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 173 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1320.0, + "completions/max_terminated_length": 1320.0, + "completions/mean_length": 978.0, + "completions/mean_terminated_length": 978.0, + "completions/min_length": 773.0, + "completions/min_terminated_length": 773.0, + "epoch": 0.04351087771942986, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.879672024437743, + "kl": 0.0014705657958984375, + "learning_rate": 4.325e-07, + "loss": 0.0099, + "num_tokens": 7763364.0, + "reward": 0.0, + "reward_std": 0.5932365655899048, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.13891962159269494, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2279594173818671, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05900408021045224, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 174 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1378.0, + "completions/max_terminated_length": 1378.0, + "completions/mean_length": 868.125, + "completions/mean_terminated_length": 868.125, + "completions/min_length": 561.0, + "completions/min_terminated_length": 561.0, + "epoch": 0.04376094023505876, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7754955377140815, + "kl": 0.0010004043579101562, + "learning_rate": 4.3499999999999996e-07, + "loss": -0.0049, + "num_tokens": 7804694.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0509774684906006, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09869560263386261, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13717345706657, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05426273532033234, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 175 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1265.0, + "completions/max_terminated_length": 1265.0, + "completions/mean_length": 1051.9375, + "completions/mean_terminated_length": 1051.9375, + "completions/min_length": 849.0, + "completions/min_terminated_length": 849.0, + "epoch": 0.04401100275068767, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7126879816087746, + "kl": 0.001110076904296875, + "learning_rate": 4.375e-07, + "loss": -0.0586, + "num_tokens": 7855477.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9663581848144531, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11415981126850343, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08263741120958111, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746357, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 176 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1215.0, + "completions/max_terminated_length": 1215.0, + "completions/mean_length": 1082.1875, + "completions/mean_terminated_length": 1082.1875, + "completions/min_length": 776.0, + "completions/min_terminated_length": 776.0, + "epoch": 0.04426106526631658, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3152465138422786, + "kl": 0.000904083251953125, + "learning_rate": 4.3999999999999997e-07, + "loss": 0.0002, + "num_tokens": 7905344.0, + "reward": 0.0, + "reward_std": 0.8798946142196655, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06926494458593051, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08525296941573593, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11013459778666118, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 177 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1233.9375, + "completions/mean_terminated_length": 1074.300048828125, + "completions/min_length": 989.0, + "completions/min_terminated_length": 989.0, + "epoch": 0.044511127781945485, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.7811948206988553, + "kl": 0.0007505416870117188, + "learning_rate": 4.425e-07, + "loss": -0.0044, + "num_tokens": 7955823.0, + "reward": -2.2351741790771484e-08, + "reward_std": 0.9717627763748169, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.20781657533912584, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2027738278878854, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07685966046898342, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 178 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1270.875, + "completions/mean_terminated_length": 1133.4000244140625, + "completions/min_length": 841.0, + "completions/min_terminated_length": 841.0, + "epoch": 0.044761190297574395, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0166375402920087, + "kl": 0.002269744873046875, + "learning_rate": 4.45e-07, + "loss": -0.0239, + "num_tokens": 8008573.0, + "reward": 0.0, + "reward_std": 0.8425703048706055, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.019215214197218236, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02882125899980341, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639732, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 179 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1147.4375, + "completions/mean_terminated_length": 1123.933349609375, + "completions/min_length": 779.0, + "completions/min_terminated_length": 779.0, + "epoch": 0.0450112528132033, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6615120501746206, + "kl": 0.002704620361328125, + "learning_rate": 4.475e-07, + "loss": -0.0103, + "num_tokens": 8042164.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8956788778305054, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0011604051933715941, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04205170296170078, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1060223596263578, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 180 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1138.375, + "completions/mean_terminated_length": 1114.2667236328125, + "completions/min_length": 824.0, + "completions/min_terminated_length": 824.0, + "epoch": 0.04526131532883221, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5062711837246527, + "kl": 0.002658843994140625, + "learning_rate": 4.5e-07, + "loss": 0.0159, + "num_tokens": 8086978.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0642964839935303, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.004533780416108619, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.057935748636117126, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194864, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 181 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1385.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 1168.5625, + "completions/mean_terminated_length": 1168.5625, + "completions/min_length": 875.0, + "completions/min_terminated_length": 875.0, + "epoch": 0.04551137784446112, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.307397413508893, + "kl": 0.002338409423828125, + "learning_rate": 4.525e-07, + "loss": 0.0182, + "num_tokens": 8130243.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0418853759765625, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.025223698389086818, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05719200983915906, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 182 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1061.1875, + "completions/mean_terminated_length": 998.5000610351562, + "completions/min_length": 693.0, + "completions/min_terminated_length": 693.0, + "epoch": 0.04576144036009002, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8936148129246053, + "kl": 0.002658843994140625, + "learning_rate": 4.55e-07, + "loss": 0.012, + "num_tokens": 8182070.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.880881130695343, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10699393149014716, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10531607770493387, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 183 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1139.0, + "completions/max_terminated_length": 1139.0, + "completions/mean_length": 933.5, + "completions/mean_terminated_length": 933.5, + "completions/min_length": 632.0, + "completions/min_terminated_length": 632.0, + "epoch": 0.04601150287571893, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9540472053827072, + "kl": 0.0007715225219726562, + "learning_rate": 4.575e-07, + "loss": -0.024, + "num_tokens": 8217590.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9921973943710327, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.003607126595316574, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07836736698036077, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08933913745655643, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 184 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1311.1875, + "completions/mean_terminated_length": 1284.21435546875, + "completions/min_length": 1124.0, + "completions/min_terminated_length": 1124.0, + "epoch": 0.046261565391347834, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8398665214032914, + "kl": 0.0017910003662109375, + "learning_rate": 4.6e-07, + "loss": 0.0092, + "num_tokens": 8270361.0, + "reward": 0.0, + "reward_std": 0.5025224089622498, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07249956098368995, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07490988099106859, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042258, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 185 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1153.0, + "completions/max_terminated_length": 1153.0, + "completions/mean_length": 828.125, + "completions/mean_terminated_length": 828.125, + "completions/min_length": 525.0, + "completions/min_terminated_length": 525.0, + "epoch": 0.046511627906976744, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8874538105045096, + "kl": 0.0012936592102050781, + "learning_rate": 4.625e-07, + "loss": -0.005, + "num_tokens": 8312123.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.021283507347107, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.006926855667889505, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09320177259311038, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 186 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1187.0, + "completions/max_terminated_length": 1187.0, + "completions/mean_length": 902.75, + "completions/mean_terminated_length": 902.75, + "completions/min_length": 651.0, + "completions/min_terminated_length": 651.0, + "epoch": 0.04676169042260565, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.335713932524795, + "kl": 0.0017910003662109375, + "learning_rate": 4.65e-07, + "loss": -0.0435, + "num_tokens": 8353719.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9269832968711853, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06367610266827373, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08713497908672396, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237135, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 187 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 945.0, + "completions/mean_terminated_length": 865.7142944335938, + "completions/min_length": 404.0, + "completions/min_terminated_length": 404.0, + "epoch": 0.047011752938234556, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.129705611734461, + "kl": 0.0024852752685546875, + "learning_rate": 4.675e-07, + "loss": -0.0988, + "num_tokens": 8383071.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8006600141525269, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1458132265641801, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07811387329522743, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078611, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 188 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1295.0, + "completions/mean_length": 1362.375, + "completions/mean_terminated_length": 1133.0, + "completions/min_length": 935.0, + "completions/min_terminated_length": 935.0, + "epoch": 0.047261815453863466, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.962946314245481, + "kl": 0.0020294189453125, + "learning_rate": 4.6999999999999995e-07, + "loss": 0.0164, + "num_tokens": 8437973.0, + "reward": 0.0, + "reward_std": 0.6167808771133423, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05990802499186012, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06100591036192288, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11417984514369003, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 189 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1141.8125, + "completions/mean_terminated_length": 1090.6429443359375, + "completions/min_length": 689.0, + "completions/min_terminated_length": 689.0, + "epoch": 0.047511877969492376, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.668659849515469, + "kl": 0.002353668212890625, + "learning_rate": 4.725e-07, + "loss": -0.0417, + "num_tokens": 8480650.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.886894702911377, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10910225101634531, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04090552894332728, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11538983843829065, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 190 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1233.0, + "completions/max_terminated_length": 1233.0, + "completions/mean_length": 744.125, + "completions/mean_terminated_length": 744.125, + "completions/min_length": 495.0, + "completions/min_terminated_length": 495.0, + "epoch": 0.04776194048512128, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5458994632506626, + "kl": 0.0016227364540100098, + "learning_rate": 4.7499999999999995e-07, + "loss": -0.0555, + "num_tokens": 8529740.0, + "reward": 0.0, + "reward_std": 0.9611965417861938, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.017597180392529825, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05906163438198417, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1813529401164726, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 191 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1377.0, + "completions/max_terminated_length": 1377.0, + "completions/mean_length": 1043.625, + "completions/mean_terminated_length": 1043.625, + "completions/min_length": 781.0, + "completions/min_terminated_length": 781.0, + "epoch": 0.04801200300075019, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4508786340082853, + "kl": 0.00266265869140625, + "learning_rate": 4.775e-07, + "loss": -0.0039, + "num_tokens": 8571654.0, + "reward": 0.0, + "reward_std": 0.9401953816413879, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.003686777260942751, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.047118529360944, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1060223596263578, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 192 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1425.0, + "completions/max_terminated_length": 1425.0, + "completions/mean_length": 948.8125, + "completions/mean_terminated_length": 948.8125, + "completions/min_length": 725.0, + "completions/min_terminated_length": 725.0, + "epoch": 0.04826206551637909, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0165040497484648, + "kl": 0.0011472702026367188, + "learning_rate": 4.8e-07, + "loss": -0.0643, + "num_tokens": 8613187.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6532577872276306, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0819679434984299, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13615105467506547, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242309, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 193 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1227.3125, + "completions/mean_terminated_length": 1188.357177734375, + "completions/min_length": 964.0, + "completions/min_terminated_length": 964.0, + "epoch": 0.048512128032008, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0363120348896726, + "kl": 0.002002716064453125, + "learning_rate": 4.824999999999999e-07, + "loss": 0.0026, + "num_tokens": 8650864.0, + "reward": 0.0, + "reward_std": 1.0577753782272339, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.022385785610326045, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12548072759684015, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298364, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 194 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1270.0, + "completions/max_terminated_length": 1270.0, + "completions/mean_length": 950.9375, + "completions/mean_terminated_length": 950.9375, + "completions/min_length": 676.0, + "completions/min_terminated_length": 676.0, + "epoch": 0.04876219054763691, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.228040886087097, + "kl": 0.002811431884765625, + "learning_rate": 4.85e-07, + "loss": -0.0262, + "num_tokens": 8695095.0, + "reward": 0.0, + "reward_std": 0.9827460646629333, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08519193256327934, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10752688908162404, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333334, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 195 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1275.0, + "completions/max_terminated_length": 1275.0, + "completions/mean_length": 1012.9375, + "completions/mean_terminated_length": 1012.9375, + "completions/min_length": 582.0, + "completions/min_terminated_length": 582.0, + "epoch": 0.049012253063265815, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.880816450325668, + "kl": 0.002227783203125, + "learning_rate": 4.875e-07, + "loss": -0.0558, + "num_tokens": 8745406.0, + "reward": 0.0, + "reward_std": 0.6283947229385376, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019409949460881287, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10115915688763275, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05146016078626408, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 196 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1416.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1122.625, + "completions/mean_terminated_length": 1122.625, + "completions/min_length": 738.0, + "completions/min_terminated_length": 738.0, + "epoch": 0.049262315578894725, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.653520509757367, + "kl": 0.001300811767578125, + "learning_rate": 4.9e-07, + "loss": -0.0, + "num_tokens": 8791776.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.943457841873169, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07789501420138707, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11337845462272575, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0958393717904348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 197 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1360.0, + "completions/mean_length": 1370.75, + "completions/mean_terminated_length": 1155.3333740234375, + "completions/min_length": 978.0, + "completions/min_terminated_length": 978.0, + "epoch": 0.04951237809452363, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.870916447128159, + "kl": 0.0017604827880859375, + "learning_rate": 4.924999999999999e-07, + "loss": 0.0353, + "num_tokens": 8853916.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9654046297073364, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06615532553149571, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1609613872111486, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.047919685895217376, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 198 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1309.25, + "completions/mean_terminated_length": 1160.888916015625, + "completions/min_length": 979.0, + "completions/min_terminated_length": 979.0, + "epoch": 0.04976244061015254, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.7886746574280674, + "kl": 0.00087738037109375, + "learning_rate": 4.95e-07, + "loss": -0.0307, + "num_tokens": 8910520.0, + "reward": 0.0, + "reward_std": 0.8775521516799927, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.044777326402216766, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07745793474979322, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.050000000000000024, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 199 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1215.0, + "completions/max_terminated_length": 1215.0, + "completions/mean_length": 1014.0625, + "completions/mean_terminated_length": 1014.0625, + "completions/min_length": 797.0, + "completions/min_terminated_length": 797.0, + "epoch": 0.05001250312578145, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.139546930243328, + "kl": 0.002994537353515625, + "learning_rate": 4.975e-07, + "loss": -0.0476, + "num_tokens": 8952721.0, + "reward": 0.0, + "reward_std": 0.9957164525985718, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03288454810768757, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11009693033685575, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06426219440409445, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 200 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1314.0, + "completions/max_terminated_length": 1314.0, + "completions/mean_length": 1016.625, + "completions/mean_terminated_length": 1016.625, + "completions/min_length": 691.0, + "completions/min_terminated_length": 691.0, + "epoch": 0.05026256564141035, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9932740304909065, + "kl": 0.00038909912109375, + "learning_rate": 5e-07, + "loss": -0.0093, + "num_tokens": 8987739.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.05843186378479, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02071547760344054, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06320200045370344, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.045338235029118136, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 201 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1435.6875, + "completions/mean_terminated_length": 1406.45458984375, + "completions/min_length": 1179.0, + "completions/min_terminated_length": 1179.0, + "epoch": 0.05051262815703926, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2253912369031634, + "kl": 0.0015163421630859375, + "learning_rate": 5.025e-07, + "loss": -0.0035, + "num_tokens": 9037622.0, + "reward": 0.0, + "reward_std": 0.47661975026130676, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0032011326224538547, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14998338402994216, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1387777332977422, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 202 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1287.0, + "completions/max_terminated_length": 1287.0, + "completions/mean_length": 1101.3125, + "completions/mean_terminated_length": 1101.3125, + "completions/min_length": 869.0, + "completions/min_terminated_length": 869.0, + "epoch": 0.05076269067266817, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.547474246627761, + "kl": 0.00141143798828125, + "learning_rate": 5.049999999999999e-07, + "loss": 0.0102, + "num_tokens": 9071099.0, + "reward": 0.0, + "reward_std": 0.6025466322898865, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.23142197210377816, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.22738858384821853, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.21495046802392337, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 203 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1351.0, + "completions/max_terminated_length": 1351.0, + "completions/mean_length": 1016.875, + "completions/mean_terminated_length": 1016.875, + "completions/min_length": 689.0, + "completions/min_terminated_length": 689.0, + "epoch": 0.05101275318829707, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3532892006405794, + "kl": 0.0019588470458984375, + "learning_rate": 5.074999999999999e-07, + "loss": -0.0456, + "num_tokens": 9123609.0, + "reward": 0.0, + "reward_std": 0.7658979296684265, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.021796104839730206, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0958342857570283, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05561108336107645, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 204 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1000.8125, + "completions/mean_terminated_length": 967.5333862304688, + "completions/min_length": 498.0, + "completions/min_terminated_length": 498.0, + "epoch": 0.05126281570392598, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4201893695500316, + "kl": 0.001346588134765625, + "learning_rate": 5.1e-07, + "loss": -0.0718, + "num_tokens": 9166854.0, + "reward": 0.0, + "reward_std": 0.9356101751327515, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08677502545262011, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10505062380282229, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0749073501808141, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 205 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1189.0, + "completions/max_terminated_length": 1189.0, + "completions/mean_length": 949.125, + "completions/mean_terminated_length": 949.125, + "completions/min_length": 674.0, + "completions/min_terminated_length": 674.0, + "epoch": 0.051512878219554886, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0597184121731136, + "kl": 0.00194549560546875, + "learning_rate": 5.125e-07, + "loss": -0.0417, + "num_tokens": 9204824.0, + "reward": 0.0, + "reward_std": 0.8644317388534546, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04658640544903133, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1194898566758173, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715323, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 206 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1128.0, + "completions/max_terminated_length": 1128.0, + "completions/mean_length": 968.4375, + "completions/mean_terminated_length": 968.4375, + "completions/min_length": 855.0, + "completions/min_terminated_length": 855.0, + "epoch": 0.051762940735183796, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.709029510602295, + "kl": 0.0013599395751953125, + "learning_rate": 5.149999999999999e-07, + "loss": 0.0248, + "num_tokens": 9236759.0, + "reward": 0.0, + "reward_std": 0.9687336683273315, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0144373673403889, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05788982793959248, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036264, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 207 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1045.0, + "completions/mean_length": 1243.0, + "completions/mean_terminated_length": 1043.111083984375, + "completions/min_length": 1040.0, + "completions/min_terminated_length": 1040.0, + "epoch": 0.052013003250812706, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7548190329287205, + "kl": 0.001178741455078125, + "learning_rate": 5.174999999999999e-07, + "loss": -0.011, + "num_tokens": 9288583.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.47366926074028015, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09969508350516527, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12756721265969515, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10318986456114841, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 208 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1172.0, + "completions/mean_terminated_length": 1150.1334228515625, + "completions/min_length": 767.0, + "completions/min_terminated_length": 767.0, + "epoch": 0.05226306576644161, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.464442321623374, + "kl": 0.002532958984375, + "learning_rate": 5.2e-07, + "loss": 0.0263, + "num_tokens": 9337127.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0057424306869507, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.036258145370141825, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1071944171874336, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14801151106386087, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 209 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1013.1875, + "completions/mean_terminated_length": 980.7333984375, + "completions/min_length": 522.0, + "completions/min_terminated_length": 522.0, + "epoch": 0.05251312828207052, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5728103424559, + "kl": 0.0026397705078125, + "learning_rate": 5.225e-07, + "loss": 0.0188, + "num_tokens": 9374354.0, + "reward": 0.0, + "reward_std": 0.2992742359638214, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.5282678773942766, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.47867995632561944, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6416666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14981470036162822, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 210 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 1298.25, + "completions/mean_terminated_length": 1141.3333740234375, + "completions/min_length": 1044.0, + "completions/min_terminated_length": 1044.0, + "epoch": 0.05276319079769942, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.6432464920194323, + "kl": 0.0005650520324707031, + "learning_rate": 5.25e-07, + "loss": 0.0158, + "num_tokens": 9418614.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.001716136932373, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03263791586638516, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11455208371243761, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8250000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042256, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 211 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1154.25, + "completions/mean_terminated_length": 1131.2000732421875, + "completions/min_length": 795.0, + "completions/min_terminated_length": 795.0, + "epoch": 0.05301325331332833, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0680960141266005, + "kl": 0.002307891845703125, + "learning_rate": 5.274999999999999e-07, + "loss": 0.001, + "num_tokens": 9466946.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.006779670715332, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09810847697087262, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14938990302469407, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1252405093617284, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 212 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1273.0, + "completions/max_terminated_length": 1273.0, + "completions/mean_length": 965.875, + "completions/mean_terminated_length": 965.875, + "completions/min_length": 758.0, + "completions/min_terminated_length": 758.0, + "epoch": 0.05326331582895724, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8714924789530134, + "kl": 0.0004398822784423828, + "learning_rate": 5.3e-07, + "loss": 0.0319, + "num_tokens": 9497848.0, + "reward": 0.0, + "reward_std": 0.7678999900817871, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10179968970700763, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09095536068417731, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10318986456114841, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 213 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1325.0, + "completions/max_terminated_length": 1325.0, + "completions/mean_length": 844.625, + "completions/mean_terminated_length": 844.625, + "completions/min_length": 570.0, + "completions/min_terminated_length": 570.0, + "epoch": 0.053513378344586145, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.552143302829822, + "kl": 0.0019893646240234375, + "learning_rate": 5.325e-07, + "loss": 0.0677, + "num_tokens": 9537066.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.025545597076416, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.24112342326597563, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14988800389270554, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901862, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 214 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1383.0, + "completions/max_terminated_length": 1383.0, + "completions/mean_length": 1214.8125, + "completions/mean_terminated_length": 1214.8125, + "completions/min_length": 1072.0, + "completions/min_terminated_length": 1072.0, + "epoch": 0.053763440860215055, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.4964481992310918, + "kl": 0.000400543212890625, + "learning_rate": 5.35e-07, + "loss": 0.0026, + "num_tokens": 9577775.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8674366474151611, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03735562811713531, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06155428338416654, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09651328828101766, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 215 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1244.625, + "completions/mean_terminated_length": 1091.4000244140625, + "completions/min_length": 899.0, + "completions/min_terminated_length": 899.0, + "epoch": 0.05401350337584396, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.024353681986083, + "kl": 0.002590179443359375, + "learning_rate": 5.374999999999999e-07, + "loss": 0.0069, + "num_tokens": 9636201.0, + "reward": 0.0, + "reward_std": 0.8815996646881104, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.28028724802885024, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3109122881292298, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952503, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 216 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1247.0, + "completions/max_terminated_length": 1247.0, + "completions/mean_length": 775.75, + "completions/mean_terminated_length": 775.75, + "completions/min_length": 554.0, + "completions/min_terminated_length": 554.0, + "epoch": 0.05426356589147287, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.509935420996739, + "kl": 0.002285003662109375, + "learning_rate": 5.4e-07, + "loss": 0.0198, + "num_tokens": 9662965.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0182379484176636, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06406818796389768, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.057622151569729885, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11122216672215289, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 217 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1375.0, + "completions/mean_length": 1231.75, + "completions/mean_terminated_length": 963.5, + "completions/min_length": 507.0, + "completions/min_terminated_length": 507.0, + "epoch": 0.05451362840710178, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4684949838034593, + "kl": 0.001468658447265625, + "learning_rate": 5.425e-07, + "loss": -0.0048, + "num_tokens": 9707385.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0508604049682617, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02398684202289427, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12511773758970687, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.675, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 218 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1416.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 976.125, + "completions/mean_terminated_length": 976.125, + "completions/min_length": 738.0, + "completions/min_terminated_length": 738.0, + "epoch": 0.05476369092273068, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7721755702267963, + "kl": 0.0017414093017578125, + "learning_rate": 5.45e-07, + "loss": 0.0044, + "num_tokens": 9743483.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.001987338066101, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03952932321050264, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1392509225368291, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 219 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1489.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 912.0625, + "completions/mean_terminated_length": 912.0625, + "completions/min_length": 526.0, + "completions/min_terminated_length": 526.0, + "epoch": 0.05501375343835959, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8549595268152794, + "kl": 0.002166748046875, + "learning_rate": 5.474999999999999e-07, + "loss": -0.0214, + "num_tokens": 9783052.0, + "reward": 0.0, + "reward_std": 0.9131899476051331, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09945392705785279, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10797928645493704, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.65, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10183501544346313, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 220 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1091.0, + "completions/max_terminated_length": 1091.0, + "completions/mean_length": 919.875, + "completions/mean_terminated_length": 919.875, + "completions/min_length": 638.0, + "completions/min_terminated_length": 638.0, + "epoch": 0.0552638159539885, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.766473008014952, + "kl": 0.002391815185546875, + "learning_rate": 5.5e-07, + "loss": -0.0294, + "num_tokens": 9833266.0, + "reward": 0.0, + "reward_std": 1.0044991970062256, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.20906607836034224, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12521082487781496, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466159, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 221 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1426.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1205.875, + "completions/mean_terminated_length": 1205.875, + "completions/min_length": 876.0, + "completions/min_terminated_length": 876.0, + "epoch": 0.0555138784696174, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9410178030930334, + "kl": 0.00176239013671875, + "learning_rate": 5.525e-07, + "loss": -0.0289, + "num_tokens": 9872704.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.7164870500564575, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.026684510511889227, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07273723529299406, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12102953419784838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 222 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1254.0, + "completions/mean_length": 1246.5625, + "completions/mean_terminated_length": 920.71435546875, + "completions/min_length": 764.0, + "completions/min_terminated_length": 764.0, + "epoch": 0.05576394098524631, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.772953541299658, + "kl": 0.00267791748046875, + "learning_rate": 5.55e-07, + "loss": 0.0041, + "num_tokens": 9934521.0, + "reward": 0.0, + "reward_std": 0.9966045618057251, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.028516981361758467, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08873869452797978, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353543, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 223 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1236.0, + "completions/max_terminated_length": 1236.0, + "completions/mean_length": 841.25, + "completions/mean_terminated_length": 841.25, + "completions/min_length": 598.0, + "completions/min_terminated_length": 598.0, + "epoch": 0.056014003500875216, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.973757372977918, + "kl": 0.002391815185546875, + "learning_rate": 5.575e-07, + "loss": -0.1168, + "num_tokens": 9962781.0, + "reward": 0.0, + "reward_std": 0.9101412296295166, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.027303925055334362, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.039342039600991835, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298364, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 224 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1231.0, + "completions/mean_terminated_length": 1168.923095703125, + "completions/min_length": 820.0, + "completions/min_terminated_length": 820.0, + "epoch": 0.056264066016504126, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2252896407429037, + "kl": 0.0021877288818359375, + "learning_rate": 5.6e-07, + "loss": -0.0587, + "num_tokens": 10015861.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0415847301483154, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10647240632097099, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10882436387805008, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08073734277593314, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 225 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1182.0, + "completions/max_terminated_length": 1182.0, + "completions/mean_length": 1047.3125, + "completions/mean_terminated_length": 1047.3125, + "completions/min_length": 870.0, + "completions/min_terminated_length": 870.0, + "epoch": 0.056514128532133036, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.933679126646427, + "kl": 0.0015716552734375, + "learning_rate": 5.625e-07, + "loss": -0.0002, + "num_tokens": 10054354.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6339588761329651, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0461640383429503, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10724005072538961, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09108400680852978, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 226 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1457.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1089.4375, + "completions/mean_terminated_length": 1089.4375, + "completions/min_length": 968.0, + "completions/min_terminated_length": 968.0, + "epoch": 0.05676419104776194, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6712494534067677, + "kl": 0.002655029296875, + "learning_rate": 5.649999999999999e-07, + "loss": 0.0283, + "num_tokens": 10098649.0, + "reward": 0.0, + "reward_std": 0.8771959543228149, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.24991943852135445, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.32252441670121146, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06763190130459201, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 227 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1455.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1027.0625, + "completions/mean_terminated_length": 1027.0625, + "completions/min_length": 627.0, + "completions/min_terminated_length": 627.0, + "epoch": 0.05701425356339085, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4161185994572087, + "kl": 0.0022220611572265625, + "learning_rate": 5.675e-07, + "loss": -0.0379, + "num_tokens": 10137410.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9872727394104004, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03347064306037619, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09668887691176824, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08432740427115679, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 228 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1332.0, + "completions/mean_length": 1145.8125, + "completions/mean_terminated_length": 1122.2000732421875, + "completions/min_length": 791.0, + "completions/min_terminated_length": 791.0, + "epoch": 0.05726431607901975, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0883882585917597, + "kl": 0.0022335052490234375, + "learning_rate": 5.699999999999999e-07, + "loss": -0.0099, + "num_tokens": 10182311.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0541945695877075, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.021799926624772474, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07056459660202051, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08073734277593314, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 229 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1297.0, + "completions/mean_length": 1326.0, + "completions/mean_terminated_length": 1102.2857666015625, + "completions/min_length": 989.0, + "completions/min_terminated_length": 989.0, + "epoch": 0.05751437859464866, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3798004577967635, + "kl": 0.00146484375, + "learning_rate": 5.725e-07, + "loss": -0.0177, + "num_tokens": 10228511.0, + "reward": 0.0, + "reward_std": 1.0345239639282227, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0283536844026792, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12057732683118884, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590966, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 230 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1399.0, + "completions/mean_length": 1186.4375, + "completions/mean_terminated_length": 998.2999877929688, + "completions/min_length": 615.0, + "completions/min_terminated_length": 615.0, + "epoch": 0.05776444111027757, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.567532346608257, + "kl": 0.002872467041015625, + "learning_rate": 5.749999999999999e-07, + "loss": 0.0753, + "num_tokens": 10279822.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9464213848114014, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.16744112366529498, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13984059169067636, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087684, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 231 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1088.0, + "completions/max_terminated_length": 1088.0, + "completions/mean_length": 879.5, + "completions/mean_terminated_length": 879.5, + "completions/min_length": 654.0, + "completions/min_terminated_length": 654.0, + "epoch": 0.058014503625906474, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.893992715276337, + "kl": 0.00148773193359375, + "learning_rate": 5.775e-07, + "loss": -0.0061, + "num_tokens": 10326126.0, + "reward": 0.0, + "reward_std": 0.5632733702659607, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1330327902048133, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.23926458383226806, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05692750425533113, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 232 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1194.0, + "completions/max_terminated_length": 1194.0, + "completions/mean_length": 805.5, + "completions/mean_terminated_length": 805.5, + "completions/min_length": 531.0, + "completions/min_terminated_length": 531.0, + "epoch": 0.058264566141535384, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2736431405768136, + "kl": 0.0015773773193359375, + "learning_rate": 5.8e-07, + "loss": -0.0545, + "num_tokens": 10353182.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.8199079036712646, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09275096233065785, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06387612988139425, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059472994182545036, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 233 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1280.9375, + "completions/mean_terminated_length": 1249.6429443359375, + "completions/min_length": 944.0, + "completions/min_terminated_length": 944.0, + "epoch": 0.058514628657164294, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.806703826667021, + "kl": 0.00206756591796875, + "learning_rate": 5.825e-07, + "loss": 0.0034, + "num_tokens": 10407285.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0577857494354248, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05108836795899558, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15638550098768525, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.65, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05163977794943223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 234 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1204.0, + "completions/mean_length": 915.6875, + "completions/mean_terminated_length": 876.7333984375, + "completions/min_length": 598.0, + "completions/min_terminated_length": 598.0, + "epoch": 0.0587646911727932, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9095211141405577, + "kl": 0.0005166530609130859, + "learning_rate": 5.849999999999999e-07, + "loss": -0.038, + "num_tokens": 10440088.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9559690952301025, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.16225807219523353, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17175817510337857, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 235 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1285.25, + "completions/mean_terminated_length": 1156.4000244140625, + "completions/min_length": 988.0, + "completions/min_terminated_length": 988.0, + "epoch": 0.05901475368842211, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.865690856639432, + "kl": 0.0019283294677734375, + "learning_rate": 5.875e-07, + "loss": 0.0295, + "num_tokens": 10486780.0, + "reward": 0.0, + "reward_std": 1.0182406902313232, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05029539771479252, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06529443504665394, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503963, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 236 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1252.0, + "completions/max_terminated_length": 1252.0, + "completions/mean_length": 1060.375, + "completions/mean_terminated_length": 1060.375, + "completions/min_length": 748.0, + "completions/min_terminated_length": 748.0, + "epoch": 0.05926481620405101, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5267577514624313, + "kl": 0.0012874603271484375, + "learning_rate": 5.9e-07, + "loss": -0.0188, + "num_tokens": 10517842.0, + "reward": 0.0, + "reward_std": 0.5545847415924072, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.013448409301274031, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.029373638235102174, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12171612389003694, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 237 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1253.0, + "completions/mean_length": 950.5625, + "completions/mean_terminated_length": 872.0714721679688, + "completions/min_length": 536.0, + "completions/min_terminated_length": 536.0, + "epoch": 0.05951487871967992, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.674164585278148, + "kl": 0.001476287841796875, + "learning_rate": 5.925e-07, + "loss": -0.0761, + "num_tokens": 10558499.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8916782140731812, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08547643825181922, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08100370835486269, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1270024788326182, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 238 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1345.375, + "completions/mean_terminated_length": 1252.5999755859375, + "completions/min_length": 989.0, + "completions/min_terminated_length": 989.0, + "epoch": 0.05976494123530883, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8588111287097537, + "kl": 0.00241851806640625, + "learning_rate": 5.949999999999999e-07, + "loss": 0.0051, + "num_tokens": 10606737.0, + "reward": -4.470348358154297e-08, + "reward_std": 1.0034493207931519, + "rewards/wordcountpos_reward_GEOBench/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.022610326690644025, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03704310910046724, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04694362260950583, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 239 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1311.75, + "completions/mean_terminated_length": 1268.3077392578125, + "completions/min_length": 1011.0, + "completions/min_terminated_length": 1011.0, + "epoch": 0.06001500375093773, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.158081674570873, + "kl": 0.00238037109375, + "learning_rate": 5.975e-07, + "loss": -0.0106, + "num_tokens": 10662989.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.846002459526062, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06325258917784692, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10419841432933279, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0768596604689834, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 240 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1260.1875, + "completions/mean_terminated_length": 1244.2000732421875, + "completions/min_length": 940.0, + "completions/min_terminated_length": 940.0, + "epoch": 0.06026506626656664, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0816530347008824, + "kl": 0.0024547576904296875, + "learning_rate": 6e-07, + "loss": -0.0226, + "num_tokens": 10714008.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.7821166515350342, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0052820588453443475, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11018378169174999, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12852438880818892, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 241 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1081.0, + "completions/max_terminated_length": 1081.0, + "completions/mean_length": 894.875, + "completions/mean_terminated_length": 894.875, + "completions/min_length": 590.0, + "completions/min_terminated_length": 590.0, + "epoch": 0.060515128782195546, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8378465084375417, + "kl": 0.002468109130859375, + "learning_rate": 6.025000000000001e-07, + "loss": -0.038, + "num_tokens": 10743902.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0237852334976196, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.3486295738137711, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.5832666466871324, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05443310539518177, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 242 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1307.0, + "completions/max_terminated_length": 1307.0, + "completions/mean_length": 1082.6875, + "completions/mean_terminated_length": 1082.6875, + "completions/min_length": 1027.0, + "completions/min_terminated_length": 1027.0, + "epoch": 0.060765191297824456, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.811977916530989, + "kl": 0.0015516281127929688, + "learning_rate": 6.049999999999999e-07, + "loss": -0.0128, + "num_tokens": 10784857.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9129359126091003, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.034100366271271824, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07590610559073963, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901862, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 243 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1228.375, + "completions/mean_terminated_length": 1210.2667236328125, + "completions/min_length": 958.0, + "completions/min_terminated_length": 958.0, + "epoch": 0.061015253813453366, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7276450034270074, + "kl": 0.001705169677734375, + "learning_rate": 6.075e-07, + "loss": 0.0108, + "num_tokens": 10832551.0, + "reward": 0.0, + "reward_std": 0.8554657697677612, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07032838081782776, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.055243674589472526, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0859586463881842, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 244 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1261.25, + "completions/mean_terminated_length": 1118.0, + "completions/min_length": 710.0, + "completions/min_terminated_length": 710.0, + "epoch": 0.06126531632908227, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1942748949392676, + "kl": 0.002468109130859375, + "learning_rate": 6.1e-07, + "loss": -0.0468, + "num_tokens": 10889227.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.057142734527588, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.004969807227602371, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04850885138214973, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6499999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06666666666666665, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 245 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1273.4375, + "completions/mean_terminated_length": 1221.1539306640625, + "completions/min_length": 901.0, + "completions/min_terminated_length": 901.0, + "epoch": 0.06151537884471118, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4899131807703387, + "kl": 0.0015316009521484375, + "learning_rate": 6.125000000000001e-07, + "loss": -0.0196, + "num_tokens": 10942426.0, + "reward": 0.0, + "reward_std": 1.0315622091293335, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08428794625761588, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06810870030669766, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921945, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 246 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1415.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 1121.75, + "completions/mean_terminated_length": 1121.75, + "completions/min_length": 895.0, + "completions/min_terminated_length": 895.0, + "epoch": 0.06176544136034009, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.624957323808172, + "kl": 0.0015163421630859375, + "learning_rate": 6.149999999999999e-07, + "loss": -0.0024, + "num_tokens": 10987318.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0115162134170532, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10626658764023841, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10387139719676511, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066221, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 247 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1112.5, + "completions/mean_terminated_length": 1057.1429443359375, + "completions/min_length": 788.0, + "completions/min_terminated_length": 788.0, + "epoch": 0.06201550387596899, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3977262592727553, + "kl": 0.0011587142944335938, + "learning_rate": 6.175e-07, + "loss": -0.0166, + "num_tokens": 11025934.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0058300495147705, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.006013688453187384, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0395963307485169, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 248 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1146.0, + "completions/max_terminated_length": 1146.0, + "completions/mean_length": 917.125, + "completions/mean_terminated_length": 917.125, + "completions/min_length": 627.0, + "completions/min_terminated_length": 627.0, + "epoch": 0.0622655663915979, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.355271822888768, + "kl": 0.000537872314453125, + "learning_rate": 6.2e-07, + "loss": -0.006, + "num_tokens": 11055328.0, + "reward": 0.0, + "reward_std": 0.864116907119751, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.028684348857531756, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19166960921683665, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 249 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1295.0625, + "completions/mean_terminated_length": 1172.0999755859375, + "completions/min_length": 1045.0, + "completions/min_terminated_length": 1045.0, + "epoch": 0.06251562890722681, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5620879463800352, + "kl": 0.001628875732421875, + "learning_rate": 6.225000000000001e-07, + "loss": 0.0357, + "num_tokens": 11099897.0, + "reward": 0.0, + "reward_std": 0.8306336402893066, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011399304748384327, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13979533825731738, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05146016078626408, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 250 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1437.8125, + "completions/mean_terminated_length": 1301.0, + "completions/min_length": 1021.0, + "completions/min_terminated_length": 1021.0, + "epoch": 0.06276569142285571, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.832343358087674, + "kl": 0.002460479736328125, + "learning_rate": 6.249999999999999e-07, + "loss": 0.0192, + "num_tokens": 11148006.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0485721826553345, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.006130266508928886, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0443087234099108, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258099, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 251 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1325.0, + "completions/mean_length": 1010.625, + "completions/mean_terminated_length": 717.0, + "completions/min_length": 433.0, + "completions/min_terminated_length": 433.0, + "epoch": 0.06301575393848462, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2539294233760745, + "kl": 0.0009403228759765625, + "learning_rate": 6.274999999999999e-07, + "loss": -0.1001, + "num_tokens": 11208784.0, + "reward": 0.0, + "reward_std": 0.9543529152870178, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03492247282453282, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10726600659282588, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09418264367902596, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 252 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1382.0, + "completions/mean_length": 981.125, + "completions/mean_terminated_length": 946.5333862304688, + "completions/min_length": 716.0, + "completions/min_terminated_length": 716.0, + "epoch": 0.06326581645411353, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.102977749073205, + "kl": 0.0019130706787109375, + "learning_rate": 6.3e-07, + "loss": -0.0193, + "num_tokens": 11241858.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9586024880409241, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03652417115396811, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0504158850407035, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05962847939999442, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 253 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1380.0, + "completions/max_terminated_length": 1380.0, + "completions/mean_length": 1076.0, + "completions/mean_terminated_length": 1076.0, + "completions/min_length": 894.0, + "completions/min_terminated_length": 894.0, + "epoch": 0.06351587896974244, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5698469734299776, + "kl": 0.0009469985961914062, + "learning_rate": 6.324999999999999e-07, + "loss": 0.0004, + "num_tokens": 11272490.0, + "reward": 0.0, + "reward_std": 0.5587515234947205, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009511012719339355, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04214859183651974, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05900408021045224, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 254 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1125.5625, + "completions/mean_terminated_length": 1039.1539306640625, + "completions/min_length": 719.0, + "completions/min_terminated_length": 719.0, + "epoch": 0.06376594148537135, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.036399391683521, + "kl": 0.00217437744140625, + "learning_rate": 6.35e-07, + "loss": -0.0222, + "num_tokens": 11316099.0, + "reward": 3.725290298461914e-08, + "reward_std": 1.0126999616622925, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05722035802239027, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2097305731100239, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6124999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07391185942027816, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 255 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1146.0, + "completions/max_terminated_length": 1146.0, + "completions/mean_length": 933.5, + "completions/mean_terminated_length": 933.5, + "completions/min_length": 493.0, + "completions/min_terminated_length": 493.0, + "epoch": 0.06401600400100026, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4957927827396063, + "kl": 0.0020294189453125, + "learning_rate": 6.374999999999999e-07, + "loss": -0.0348, + "num_tokens": 11361211.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.6907597780227661, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0433495404529792, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.28070774823480305, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06666666666666665, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 256 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1411.0, + "completions/max_terminated_length": 1411.0, + "completions/mean_length": 1023.5, + "completions/mean_terminated_length": 1023.5, + "completions/min_length": 301.0, + "completions/min_terminated_length": 301.0, + "epoch": 0.06426606651662915, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5430004772870434, + "kl": 0.002620697021484375, + "learning_rate": 6.4e-07, + "loss": -0.0206, + "num_tokens": 11403107.0, + "reward": 0.0, + "reward_std": 0.45779359340667725, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03511369395285535, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1293250257913893, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09583937179043478, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 257 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1210.0, + "completions/max_terminated_length": 1210.0, + "completions/mean_length": 649.8125, + "completions/mean_terminated_length": 649.8125, + "completions/min_length": 276.0, + "completions/min_terminated_length": 276.0, + "epoch": 0.06451612903225806, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4502842365620263, + "kl": 0.0010819435119628906, + "learning_rate": 6.424999999999999e-07, + "loss": 0.1181, + "num_tokens": 11443752.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.3387117385864258, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11584878359516691, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12819166326071618, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1767505042163692, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 258 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1469.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1072.3125, + "completions/mean_terminated_length": 1072.3125, + "completions/min_length": 734.0, + "completions/min_terminated_length": 734.0, + "epoch": 0.06476619154788697, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.600501791946501, + "kl": 0.0024871826171875, + "learning_rate": 6.45e-07, + "loss": -0.0123, + "num_tokens": 11489493.0, + "reward": 0.0, + "reward_std": 0.6726632118225098, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1096791465369163, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1536459979775206, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6124999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08509254221575906, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 259 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1151.9375, + "completions/mean_terminated_length": 993.727294921875, + "completions/min_length": 704.0, + "completions/min_terminated_length": 704.0, + "epoch": 0.06501625406351588, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1286650102336147, + "kl": 0.001117706298828125, + "learning_rate": 6.474999999999999e-07, + "loss": -0.0716, + "num_tokens": 11536532.0, + "reward": 0.0, + "reward_std": 0.9845417737960815, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0031067603617350534, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08154176542224935, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05947299418254506, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 260 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1249.9375, + "completions/mean_terminated_length": 1214.21435546875, + "completions/min_length": 821.0, + "completions/min_terminated_length": 821.0, + "epoch": 0.06526631657914479, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.990946151033602, + "kl": 0.002300262451171875, + "learning_rate": 6.5e-07, + "loss": -0.0063, + "num_tokens": 11580635.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9583099484443665, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08016585009371262, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20522712404443513, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1270024788326182, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 261 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1101.375, + "completions/mean_terminated_length": 1009.3846435546875, + "completions/min_length": 650.0, + "completions/min_terminated_length": 650.0, + "epoch": 0.06551637909477369, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.058821842612301, + "kl": 0.001949310302734375, + "learning_rate": 6.524999999999999e-07, + "loss": 0.02, + "num_tokens": 11613849.0, + "reward": 0.0, + "reward_std": 0.702329158782959, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.012680776025188863, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06758287070383602, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 262 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1233.0, + "completions/max_terminated_length": 1233.0, + "completions/mean_length": 990.8125, + "completions/mean_terminated_length": 990.8125, + "completions/min_length": 759.0, + "completions/min_terminated_length": 759.0, + "epoch": 0.0657664416104026, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6120605727102597, + "kl": 0.0011854171752929688, + "learning_rate": 6.55e-07, + "loss": -0.0325, + "num_tokens": 11654022.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0067023038864136, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05495842149787682, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09894459356470472, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 263 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1264.0, + "completions/max_terminated_length": 1264.0, + "completions/mean_length": 1062.0625, + "completions/mean_terminated_length": 1062.0625, + "completions/min_length": 769.0, + "completions/min_terminated_length": 769.0, + "epoch": 0.06601650412603151, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4983908703024214, + "kl": 0.002307891845703125, + "learning_rate": 6.575e-07, + "loss": -0.0043, + "num_tokens": 11690023.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0064297914505005, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03156833990619317, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.057873267698694775, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16055459438389727, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 264 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1287.0, + "completions/max_terminated_length": 1287.0, + "completions/mean_length": 970.625, + "completions/mean_terminated_length": 970.625, + "completions/min_length": 620.0, + "completions/min_terminated_length": 620.0, + "epoch": 0.06626656664166042, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7849445627454084, + "kl": 0.001617431640625, + "learning_rate": 6.6e-07, + "loss": 0.009, + "num_tokens": 11718209.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5248290300369263, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.020297764395713007, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.23865571092228594, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11344765475923412, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 265 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1349.0, + "completions/mean_length": 1092.0, + "completions/mean_terminated_length": 1064.800048828125, + "completions/min_length": 823.0, + "completions/min_terminated_length": 823.0, + "epoch": 0.06651662915728933, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5637021163456346, + "kl": 0.0013856887817382812, + "learning_rate": 6.624999999999999e-07, + "loss": 0.0016, + "num_tokens": 11766089.0, + "reward": 0.0, + "reward_std": 0.8838258385658264, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10245821802969954, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1265083889126653, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13305526559931294, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 266 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1249.0, + "completions/max_terminated_length": 1249.0, + "completions/mean_length": 854.9375, + "completions/mean_terminated_length": 854.9375, + "completions/min_length": 594.0, + "completions/min_terminated_length": 594.0, + "epoch": 0.06676669167291822, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9427591253308374, + "kl": 0.0015392303466796875, + "learning_rate": 6.65e-07, + "loss": -0.0706, + "num_tokens": 11800320.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.904124915599823, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.018276466226423864, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.042708970102993744, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0859586463881842, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 267 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1226.0, + "completions/max_terminated_length": 1226.0, + "completions/mean_length": 1114.875, + "completions/mean_terminated_length": 1114.875, + "completions/min_length": 800.0, + "completions/min_terminated_length": 800.0, + "epoch": 0.06701675418854713, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.5733234774488296, + "kl": 0.0003871917724609375, + "learning_rate": 6.675e-07, + "loss": -0.0475, + "num_tokens": 11835598.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.8553204536437988, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05429498064069067, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13442257286347392, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901862, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 268 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1283.0, + "completions/max_terminated_length": 1283.0, + "completions/mean_length": 1128.0625, + "completions/mean_terminated_length": 1128.0625, + "completions/min_length": 983.0, + "completions/min_terminated_length": 983.0, + "epoch": 0.06726681670417604, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7586718568305466, + "kl": 0.003025054931640625, + "learning_rate": 6.7e-07, + "loss": -0.0175, + "num_tokens": 11876351.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9691793918609619, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.18159044109329997, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2779341783919626, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408155, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 269 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1113.0625, + "completions/mean_terminated_length": 1087.2667236328125, + "completions/min_length": 765.0, + "completions/min_terminated_length": 765.0, + "epoch": 0.06751687921980495, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.125740609734866, + "kl": 0.0022373199462890625, + "learning_rate": 6.724999999999999e-07, + "loss": -0.0405, + "num_tokens": 11926160.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6015282869338989, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.19302369926246749, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1589005330122471, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639732, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 270 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1493.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1085.4375, + "completions/mean_terminated_length": 1085.4375, + "completions/min_length": 755.0, + "completions/min_terminated_length": 755.0, + "epoch": 0.06776694173543386, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.481181352755889, + "kl": 0.001567840576171875, + "learning_rate": 6.75e-07, + "loss": -0.0276, + "num_tokens": 11970647.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8944178819656372, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11946546619585773, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10808708425634056, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 271 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1367.75, + "completions/mean_terminated_length": 1235.5, + "completions/min_length": 936.0, + "completions/min_terminated_length": 936.0, + "epoch": 0.06801700425106276, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8739573609515554, + "kl": 0.0013885498046875, + "learning_rate": 6.775e-07, + "loss": -0.0325, + "num_tokens": 12033603.0, + "reward": 0.0, + "reward_std": 0.7130506634712219, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1108193916498551, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17112563160205552, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11021863793455332, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 272 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1460.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1084.3125, + "completions/mean_terminated_length": 1084.3125, + "completions/min_length": 886.0, + "completions/min_terminated_length": 886.0, + "epoch": 0.06826706676669167, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9999454473777705, + "kl": 0.0007653236389160156, + "learning_rate": 6.800000000000001e-07, + "loss": -0.0043, + "num_tokens": 12070896.0, + "reward": 0.0, + "reward_std": 0.868825376033783, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04356172249671384, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11085815076207377, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 273 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1373.0, + "completions/mean_length": 1067.125, + "completions/mean_terminated_length": 1038.2667236328125, + "completions/min_length": 826.0, + "completions/min_terminated_length": 826.0, + "epoch": 0.06851712928232058, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7996917097231515, + "kl": 0.002948760986328125, + "learning_rate": 6.824999999999999e-07, + "loss": -0.0558, + "num_tokens": 12120562.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0252115726470947, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.057024397031840035, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08133223543956405, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 274 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1407.0625, + "completions/mean_terminated_length": 1252.166748046875, + "completions/min_length": 1046.0, + "completions/min_terminated_length": 1046.0, + "epoch": 0.06876719179794949, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.819359255452125, + "kl": 0.001789093017578125, + "learning_rate": 6.85e-07, + "loss": -0.0175, + "num_tokens": 12170715.0, + "reward": 0.0, + "reward_std": 0.7513598799705505, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019858615808274964, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04457437287429445, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 275 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1182.0, + "completions/max_terminated_length": 1182.0, + "completions/mean_length": 892.9375, + "completions/mean_terminated_length": 892.9375, + "completions/min_length": 678.0, + "completions/min_terminated_length": 678.0, + "epoch": 0.0690172543135784, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.057675138066892, + "kl": 0.001850128173828125, + "learning_rate": 6.875e-07, + "loss": -0.0158, + "num_tokens": 12209442.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.011250376701355, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.017816977445875737, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08634398903762926, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408157, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 276 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1351.9375, + "completions/mean_terminated_length": 1284.6363525390625, + "completions/min_length": 1043.0, + "completions/min_terminated_length": 1043.0, + "epoch": 0.0692673168292073, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.361583283541367, + "kl": 0.0016326904296875, + "learning_rate": 6.9e-07, + "loss": 0.0123, + "num_tokens": 12249809.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0286308526992798, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.013938472817476729, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03059370749839712, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1080980350662545, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 277 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1361.0, + "completions/max_terminated_length": 1361.0, + "completions/mean_length": 1044.75, + "completions/mean_terminated_length": 1044.75, + "completions/min_length": 851.0, + "completions/min_terminated_length": 851.0, + "epoch": 0.0695173793448362, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.052482052780787, + "kl": 0.002025604248046875, + "learning_rate": 6.924999999999999e-07, + "loss": -0.0103, + "num_tokens": 12292437.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.8884425163269043, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.22026010356828252, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.28481708014536833, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1519624710005487, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 278 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1396.0, + "completions/max_terminated_length": 1396.0, + "completions/mean_length": 1115.5625, + "completions/mean_terminated_length": 1115.5625, + "completions/min_length": 879.0, + "completions/min_terminated_length": 879.0, + "epoch": 0.06976744186046512, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.262958536457561, + "kl": 0.0026569366455078125, + "learning_rate": 6.949999999999999e-07, + "loss": -0.0443, + "num_tokens": 12347062.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8817810416221619, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05843696767751686, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1157254461089627, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6458333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16095778144410233, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 279 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1331.0, + "completions/mean_length": 1168.4375, + "completions/mean_terminated_length": 1146.3333740234375, + "completions/min_length": 905.0, + "completions/min_terminated_length": 905.0, + "epoch": 0.07001750437609403, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.126581351643466, + "kl": 0.00225067138671875, + "learning_rate": 6.975e-07, + "loss": 0.0671, + "num_tokens": 12389453.0, + "reward": 0.0, + "reward_std": 0.395698606967926, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07554237546472012, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.153868096006047, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237132, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 280 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1180.5, + "completions/mean_terminated_length": 1159.2000732421875, + "completions/min_length": 862.0, + "completions/min_terminated_length": 862.0, + "epoch": 0.07026756689172294, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8285222259971663, + "kl": 0.0020236968994140625, + "learning_rate": 7e-07, + "loss": -0.0559, + "num_tokens": 12442085.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9814261198043823, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0008941992314898256, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04770861561124265, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09259629622222519, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 281 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1323.0, + "completions/max_terminated_length": 1323.0, + "completions/mean_length": 1074.9375, + "completions/mean_terminated_length": 1074.9375, + "completions/min_length": 808.0, + "completions/min_terminated_length": 808.0, + "epoch": 0.07051762940735183, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.395295718272769, + "kl": 0.003032684326171875, + "learning_rate": 7.024999999999999e-07, + "loss": 0.0017, + "num_tokens": 12488668.0, + "reward": 0.0, + "reward_std": 0.9231893420219421, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.027643715163394963, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08541738436101692, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 282 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1173.3125, + "completions/mean_terminated_length": 1151.533447265625, + "completions/min_length": 733.0, + "completions/min_terminated_length": 733.0, + "epoch": 0.07076769192298074, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.558546070857536, + "kl": 0.001399993896484375, + "learning_rate": 7.049999999999999e-07, + "loss": 0.0493, + "num_tokens": 12544793.0, + "reward": 0.0, + "reward_std": 0.8663690090179443, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07163557719159587, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08398576705289333, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6416666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639733, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 283 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1337.0, + "completions/max_terminated_length": 1337.0, + "completions/mean_length": 1002.5, + "completions/mean_terminated_length": 1002.5, + "completions/min_length": 500.0, + "completions/min_terminated_length": 500.0, + "epoch": 0.07101775443860965, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2833533602148686, + "kl": 0.0020923614501953125, + "learning_rate": 7.075e-07, + "loss": -0.1008, + "num_tokens": 12592825.0, + "reward": 0.0, + "reward_std": 0.6071997880935669, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.3428568265897377, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.6569867230361525, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0718795288428261, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 284 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1180.9375, + "completions/mean_terminated_length": 1035.9091796875, + "completions/min_length": 883.0, + "completions/min_terminated_length": 883.0, + "epoch": 0.07126781695423856, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.480266628326831, + "kl": 0.0015401840209960938, + "learning_rate": 7.1e-07, + "loss": 0.0233, + "num_tokens": 12638432.0, + "reward": 0.0, + "reward_std": 0.7625324726104736, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09114665847302011, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11135519284845713, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503961, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 285 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1255.0, + "completions/max_terminated_length": 1255.0, + "completions/mean_length": 1047.375, + "completions/mean_terminated_length": 1047.375, + "completions/min_length": 506.0, + "completions/min_terminated_length": 506.0, + "epoch": 0.07151787946986747, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3668386685314786, + "kl": 0.0021762847900390625, + "learning_rate": 7.125e-07, + "loss": -0.0169, + "num_tokens": 12678382.0, + "reward": 0.0, + "reward_std": 1.0190746784210205, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.017188595679654403, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03641719873688553, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133131, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 286 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1258.75, + "completions/mean_terminated_length": 1178.3333740234375, + "completions/min_length": 785.0, + "completions/min_terminated_length": 785.0, + "epoch": 0.07176794198549638, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0447466153030547, + "kl": 0.002651214599609375, + "learning_rate": 7.149999999999999e-07, + "loss": -0.0735, + "num_tokens": 12735850.0, + "reward": 0.0, + "reward_std": 0.8971014618873596, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0029448365732882716, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1173658968369943, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869927, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 287 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1346.0, + "completions/mean_length": 1158.375, + "completions/mean_terminated_length": 1135.60009765625, + "completions/min_length": 919.0, + "completions/min_terminated_length": 919.0, + "epoch": 0.07201800450112528, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.949199351158853, + "kl": 0.002254486083984375, + "learning_rate": 7.175e-07, + "loss": -0.0476, + "num_tokens": 12780896.0, + "reward": 0.0, + "reward_std": 0.9538476467132568, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.038971442580359866, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.22248888911330741, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298362, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 288 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 981.0, + "completions/max_terminated_length": 981.0, + "completions/mean_length": 748.0625, + "completions/mean_terminated_length": 748.0625, + "completions/min_length": 618.0, + "completions/min_terminated_length": 618.0, + "epoch": 0.07226806701675419, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7421629498577853, + "kl": 0.0005278587341308594, + "learning_rate": 7.2e-07, + "loss": 0.0207, + "num_tokens": 12808993.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9267148375511169, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.115115519404858, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15820119598411322, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10470416879457556, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 289 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1328.4375, + "completions/mean_terminated_length": 1271.25, + "completions/min_length": 848.0, + "completions/min_terminated_length": 848.0, + "epoch": 0.0725181295323831, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8064079386948997, + "kl": 0.0021820068359375, + "learning_rate": 7.225e-07, + "loss": 0.0703, + "num_tokens": 12868232.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9796990156173706, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.018506586328858782, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19966599675627186, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0938872452190116, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 290 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1442.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1130.25, + "completions/mean_terminated_length": 1130.25, + "completions/min_length": 954.0, + "completions/min_terminated_length": 954.0, + "epoch": 0.072768192048012, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4150201419799755, + "kl": 0.0015707015991210938, + "learning_rate": 7.249999999999999e-07, + "loss": -0.0036, + "num_tokens": 12912660.0, + "reward": -1.1175870895385742e-08, + "reward_std": 0.96712327003479, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.046138315434293614, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10659843837281545, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 291 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1202.5, + "completions/mean_terminated_length": 1133.84619140625, + "completions/min_length": 900.0, + "completions/min_terminated_length": 900.0, + "epoch": 0.07301825456364092, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.524192500616819, + "kl": 0.0018463134765625, + "learning_rate": 7.275e-07, + "loss": -0.0006, + "num_tokens": 12962372.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6025065779685974, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04452348800585452, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.23725677038240847, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10327955589886448, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 292 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1198.0625, + "completions/mean_terminated_length": 1177.933349609375, + "completions/min_length": 900.0, + "completions/min_terminated_length": 900.0, + "epoch": 0.07326831707926981, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2931131557163593, + "kl": 0.002475738525390625, + "learning_rate": 7.3e-07, + "loss": -0.0061, + "num_tokens": 13008501.0, + "reward": 0.0, + "reward_std": 0.8736509084701538, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03790984309528585, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05965534243742646, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1112221667221529, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 293 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1344.9375, + "completions/mean_terminated_length": 1251.9000244140625, + "completions/min_length": 1115.0, + "completions/min_terminated_length": 1115.0, + "epoch": 0.07351837959489872, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4734191218691826, + "kl": 0.0019664764404296875, + "learning_rate": 7.325e-07, + "loss": 0.0079, + "num_tokens": 13057828.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9454586505889893, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06507817522641411, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09241118382597226, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1128748897706693, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 294 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1289.0, + "completions/max_terminated_length": 1289.0, + "completions/mean_length": 1100.375, + "completions/mean_terminated_length": 1100.375, + "completions/min_length": 851.0, + "completions/min_terminated_length": 851.0, + "epoch": 0.07376844211052763, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.038036839604, + "kl": 0.0023899078369140625, + "learning_rate": 7.35e-07, + "loss": 0.0143, + "num_tokens": 13103754.0, + "reward": 0.0, + "reward_std": 0.8434479832649231, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.053762377622439225, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09125755600278293, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15299479536052008, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 295 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1366.0, + "completions/max_terminated_length": 1366.0, + "completions/mean_length": 1150.8125, + "completions/mean_terminated_length": 1150.8125, + "completions/min_length": 510.0, + "completions/min_terminated_length": 510.0, + "epoch": 0.07401850462615654, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0685375034483746, + "kl": 0.0020503997802734375, + "learning_rate": 7.375e-07, + "loss": -0.0051, + "num_tokens": 13159199.0, + "reward": 0.0, + "reward_std": 0.9001356363296509, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.005181376225364255, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12452512216974775, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036262, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 296 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.8125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1492.875, + "completions/mean_terminated_length": 1462.0, + "completions/min_length": 1415.0, + "completions/min_terminated_length": 1415.0, + "epoch": 0.07426856714178545, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8733754958004836, + "kl": 0.002521514892578125, + "learning_rate": 7.4e-07, + "loss": 0.0027, + "num_tokens": 13225733.0, + "reward": 0.0, + "reward_std": 1.0578339099884033, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.16663837864491687, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12428585484645702, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11279282877125751, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 297 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1243.0, + "completions/max_terminated_length": 1243.0, + "completions/mean_length": 827.625, + "completions/mean_terminated_length": 827.625, + "completions/min_length": 703.0, + "completions/min_terminated_length": 703.0, + "epoch": 0.07451862965741435, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6172101857742094, + "kl": 0.001758575439453125, + "learning_rate": 7.425e-07, + "loss": -0.0263, + "num_tokens": 13264111.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6918116807937622, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10721104265913076, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09002126983736299, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0739118594202782, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 298 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1125.3125, + "completions/mean_terminated_length": 1000.4166870117188, + "completions/min_length": 678.0, + "completions/min_terminated_length": 678.0, + "epoch": 0.07476869217304326, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5389324019926414, + "kl": 0.003154754638671875, + "learning_rate": 7.45e-07, + "loss": 0.0356, + "num_tokens": 13308780.0, + "reward": 0.0, + "reward_std": 0.8233329057693481, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09108400680852974, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 299 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1403.0, + "completions/max_terminated_length": 1403.0, + "completions/mean_length": 1121.4375, + "completions/mean_terminated_length": 1121.4375, + "completions/min_length": 926.0, + "completions/min_terminated_length": 926.0, + "epoch": 0.07501875468867217, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.304716004148669, + "kl": 0.00246429443359375, + "learning_rate": 7.475e-07, + "loss": 0.0179, + "num_tokens": 13357659.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8861004114151001, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0023570908932258414, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1211039485992521, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563384, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 300 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1421.0, + "completions/mean_length": 1042.25, + "completions/mean_terminated_length": 936.6154174804688, + "completions/min_length": 596.0, + "completions/min_terminated_length": 596.0, + "epoch": 0.07526881720430108, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5485258366070815, + "kl": 0.0020999908447265625, + "learning_rate": 7.5e-07, + "loss": -0.0623, + "num_tokens": 13409767.0, + "reward": 0.0, + "reward_std": 0.7425888776779175, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07222190489080155, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07977127139181157, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066221, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 301 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1256.8125, + "completions/mean_terminated_length": 1222.071533203125, + "completions/min_length": 947.0, + "completions/min_terminated_length": 947.0, + "epoch": 0.07551887971992999, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7034280481736124, + "kl": 0.001636505126953125, + "learning_rate": 7.524999999999999e-07, + "loss": -0.0219, + "num_tokens": 13464004.0, + "reward": 0.0, + "reward_std": 0.9931418299674988, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10218197079984964, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10867400300606088, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042255, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 302 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1263.125, + "completions/mean_terminated_length": 1184.166748046875, + "completions/min_length": 828.0, + "completions/min_terminated_length": 828.0, + "epoch": 0.07576894223555888, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.82419466392937, + "kl": 0.0019636154174804688, + "learning_rate": 7.55e-07, + "loss": -0.0243, + "num_tokens": 13514718.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8591594099998474, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.20725042185949571, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17933860689149245, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1061096567672295, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 303 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1145.0, + "completions/max_terminated_length": 1145.0, + "completions/mean_length": 988.5, + "completions/mean_terminated_length": 988.5, + "completions/min_length": 804.0, + "completions/min_terminated_length": 804.0, + "epoch": 0.0760190047511878, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.044253867661982, + "kl": 0.0016145706176757812, + "learning_rate": 7.575e-07, + "loss": -0.0275, + "num_tokens": 13559222.0, + "reward": 0.0, + "reward_std": 0.4617592990398407, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.037052493754070076, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11325164445405858, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15770342536029575, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 304 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1238.0, + "completions/max_terminated_length": 1238.0, + "completions/mean_length": 1108.25, + "completions/mean_terminated_length": 1108.25, + "completions/min_length": 971.0, + "completions/min_terminated_length": 971.0, + "epoch": 0.0762690672668167, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.448655382575357, + "kl": 0.0013818740844726562, + "learning_rate": 7.599999999999999e-07, + "loss": 0.0024, + "num_tokens": 13596858.0, + "reward": -1.1175870895385742e-08, + "reward_std": 0.9393701553344727, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03958701664442946, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.062493716984334646, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09737289911202955, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 305 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1249.1875, + "completions/mean_terminated_length": 1232.4666748046875, + "completions/min_length": 794.0, + "completions/min_terminated_length": 794.0, + "epoch": 0.07651912978244561, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4523334890228496, + "kl": 0.00299835205078125, + "learning_rate": 7.624999999999999e-07, + "loss": 0.019, + "num_tokens": 13633157.0, + "reward": 0.0, + "reward_std": 0.9702248573303223, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.012760892845286333, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02816278593881227, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.24278476979506858, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 306 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1341.0, + "completions/max_terminated_length": 1341.0, + "completions/mean_length": 1153.625, + "completions/mean_terminated_length": 1153.625, + "completions/min_length": 915.0, + "completions/min_terminated_length": 915.0, + "epoch": 0.07676919229807452, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3728724235131273, + "kl": 0.002376556396484375, + "learning_rate": 7.65e-07, + "loss": 0.0211, + "num_tokens": 13673847.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8898781538009644, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.13326094944067918, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05885076374041348, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06309898162000308, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 307 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1487.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1108.0, + "completions/mean_terminated_length": 1108.0, + "completions/min_length": 817.0, + "completions/min_terminated_length": 817.0, + "epoch": 0.07701925481370342, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5703715397531255, + "kl": 0.00151824951171875, + "learning_rate": 7.675e-07, + "loss": 0.0024, + "num_tokens": 13717103.0, + "reward": 0.0, + "reward_std": 1.0142549276351929, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005675689037163896, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09202039331057663, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0666666666666667, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 308 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1248.0, + "completions/mean_length": 853.3125, + "completions/mean_terminated_length": 810.2000122070312, + "completions/min_length": 444.0, + "completions/min_terminated_length": 444.0, + "epoch": 0.07726931732933233, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.876164853218412, + "kl": 0.0032958984375, + "learning_rate": 7.699999999999999e-07, + "loss": -0.0653, + "num_tokens": 13757692.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0442941188812256, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09181096082066341, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1438423548521973, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13763881881375054, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 309 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1199.4375, + "completions/mean_terminated_length": 1019.1000366210938, + "completions/min_length": 663.0, + "completions/min_terminated_length": 663.0, + "epoch": 0.07751937984496124, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.731075809106309, + "kl": 0.0015163421630859375, + "learning_rate": 7.724999999999999e-07, + "loss": 0.0396, + "num_tokens": 13798227.0, + "reward": 2.2351741790771484e-08, + "reward_std": 0.9579081535339355, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.032908407772239974, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.056083110184510385, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 310 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1367.0, + "completions/mean_length": 1220.75, + "completions/mean_terminated_length": 1156.3077392578125, + "completions/min_length": 976.0, + "completions/min_terminated_length": 976.0, + "epoch": 0.07776944236059015, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9106815475404613, + "kl": 0.0017490386962890625, + "learning_rate": 7.75e-07, + "loss": -0.0254, + "num_tokens": 13853423.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.042161464691162, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07120444905319084, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06812358717075977, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09999999999999999, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 311 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1390.0, + "completions/mean_length": 1122.0, + "completions/mean_terminated_length": 1034.769287109375, + "completions/min_length": 782.0, + "completions/min_terminated_length": 782.0, + "epoch": 0.07801950487621906, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3723664869213272, + "kl": 0.00154876708984375, + "learning_rate": 7.775e-07, + "loss": 0.0225, + "num_tokens": 13886783.0, + "reward": 0.0, + "reward_std": 0.8425365090370178, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03600875407719076, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07563825766680966, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 312 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1292.0, + "completions/max_terminated_length": 1292.0, + "completions/mean_length": 1013.3125, + "completions/mean_terminated_length": 1013.3125, + "completions/min_length": 726.0, + "completions/min_terminated_length": 726.0, + "epoch": 0.07826956739184796, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7362058482230833, + "kl": 0.002788543701171875, + "learning_rate": 7.799999999999999e-07, + "loss": -0.0163, + "num_tokens": 13927060.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9636948108673096, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.016529971853632308, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03453559976453091, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08777074514725113, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 313 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1462.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 981.5, + "completions/mean_terminated_length": 981.5, + "completions/min_length": 723.0, + "completions/min_terminated_length": 723.0, + "epoch": 0.07851962990747686, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1031852507110265, + "kl": 0.0015773773193359375, + "learning_rate": 7.824999999999999e-07, + "loss": -0.0099, + "num_tokens": 13965644.0, + "reward": 0.0, + "reward_std": 0.8738616108894348, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.030122596688022446, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03194335535391865, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09496588081262934, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 314 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1348.75, + "completions/mean_terminated_length": 1096.666748046875, + "completions/min_length": 908.0, + "completions/min_terminated_length": 908.0, + "epoch": 0.07876969242310577, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2293750297793786, + "kl": 0.00272369384765625, + "learning_rate": 7.85e-07, + "loss": -0.0101, + "num_tokens": 14013464.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.04903244972229, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06102688636719043, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11626404179764524, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 315 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1354.0, + "completions/max_terminated_length": 1354.0, + "completions/mean_length": 1047.625, + "completions/mean_terminated_length": 1047.625, + "completions/min_length": 646.0, + "completions/min_terminated_length": 646.0, + "epoch": 0.07901975493873468, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.578535635619387, + "kl": 0.0025787353515625, + "learning_rate": 7.875e-07, + "loss": -0.0491, + "num_tokens": 14055346.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6440032720565796, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.045457486603114675, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05151398535755129, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1792473978322409, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 316 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1331.0, + "completions/mean_length": 1001.375, + "completions/mean_terminated_length": 886.3077392578125, + "completions/min_length": 585.0, + "completions/min_terminated_length": 585.0, + "epoch": 0.0792698174543636, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.648400252267846, + "kl": 0.002716064453125, + "learning_rate": 7.9e-07, + "loss": -0.0425, + "num_tokens": 14098920.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9862943887710571, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.21527701446938932, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15323918967053549, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333333, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 317 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1196.0, + "completions/max_terminated_length": 1196.0, + "completions/mean_length": 1011.0, + "completions/mean_terminated_length": 1011.0, + "completions/min_length": 688.0, + "completions/min_terminated_length": 688.0, + "epoch": 0.0795198799699925, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2419490932321615, + "kl": 0.0016880035400390625, + "learning_rate": 7.924999999999999e-07, + "loss": -0.03, + "num_tokens": 14149960.0, + "reward": 0.0, + "reward_std": 0.840920090675354, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.17726251562603354, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11803772101374332, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 318 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1340.0625, + "completions/mean_terminated_length": 1286.75, + "completions/min_length": 810.0, + "completions/min_terminated_length": 810.0, + "epoch": 0.0797699424856214, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.646781792424534, + "kl": 0.0018825531005859375, + "learning_rate": 7.95e-07, + "loss": -0.0489, + "num_tokens": 14197233.0, + "reward": -2.2351741790771484e-08, + "reward_std": 0.9562281966209412, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09456604176685181, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10946838824528554, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 319 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1393.0, + "completions/mean_length": 1220.0625, + "completions/mean_terminated_length": 1126.75, + "completions/min_length": 876.0, + "completions/min_terminated_length": 876.0, + "epoch": 0.08002000500125031, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0340114467425328, + "kl": 0.002162933349609375, + "learning_rate": 7.975e-07, + "loss": 0.0197, + "num_tokens": 14249946.0, + "reward": 0.0, + "reward_std": 1.044987440109253, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06618808301038787, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06365952726772346, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 320 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1351.0, + "completions/max_terminated_length": 1351.0, + "completions/mean_length": 1170.625, + "completions/mean_terminated_length": 1170.625, + "completions/min_length": 962.0, + "completions/min_terminated_length": 962.0, + "epoch": 0.08027006751687922, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0492086474434723, + "kl": 0.0019989013671875, + "learning_rate": 8e-07, + "loss": -0.0125, + "num_tokens": 14290388.0, + "reward": 0.0, + "reward_std": 0.9445364475250244, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06574314745388786, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.056918691981295635, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746357, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 321 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1143.9375, + "completions/mean_terminated_length": 1120.2000732421875, + "completions/min_length": 850.0, + "completions/min_terminated_length": 850.0, + "epoch": 0.08052013003250813, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.782074467551011, + "kl": 0.001148223876953125, + "learning_rate": 8.024999999999999e-07, + "loss": 0.0047, + "num_tokens": 14329075.0, + "reward": -1.6763806343078613e-08, + "reward_std": 1.0339581966400146, + "rewards/wordcountpos_reward_GEOBench/mean": -1.6763806343078613e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.018951548252913175, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08903165395009896, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 322 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1265.125, + "completions/mean_terminated_length": 1158.3636474609375, + "completions/min_length": 818.0, + "completions/min_terminated_length": 818.0, + "epoch": 0.08077019254813704, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6894629812725404, + "kl": 0.0016269683837890625, + "learning_rate": 8.05e-07, + "loss": -0.0479, + "num_tokens": 14380381.0, + "reward": 0.0, + "reward_std": 0.866814374923706, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.20501929877948527, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1159059586831605, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09496588081262934, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 323 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1212.875, + "completions/mean_terminated_length": 1171.857177734375, + "completions/min_length": 987.0, + "completions/min_terminated_length": 987.0, + "epoch": 0.08102025506376594, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.419083515090992, + "kl": 0.0014539361000061035, + "learning_rate": 8.075e-07, + "loss": -0.0097, + "num_tokens": 14427179.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0277795791625977, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.022680328708815162, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08110772910152395, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042256, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 324 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1414.0, + "completions/max_terminated_length": 1414.0, + "completions/mean_length": 1159.25, + "completions/mean_terminated_length": 1159.25, + "completions/min_length": 1042.0, + "completions/min_terminated_length": 1042.0, + "epoch": 0.08127031757939485, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1326798733024352, + "kl": 0.0010113716125488281, + "learning_rate": 8.1e-07, + "loss": -0.0193, + "num_tokens": 14480583.0, + "reward": 0.0, + "reward_std": 0.8790631294250488, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07761516576743321, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12295262580936234, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 325 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1357.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 984.375, + "completions/mean_terminated_length": 984.375, + "completions/min_length": 636.0, + "completions/min_terminated_length": 636.0, + "epoch": 0.08152038009502376, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1101518433830493, + "kl": 0.001667022705078125, + "learning_rate": 8.125e-07, + "loss": -0.0418, + "num_tokens": 14517445.0, + "reward": 0.0, + "reward_std": 0.4544769525527954, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.026862744200623262, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11551283797062638, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1102186379345533, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 326 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1446.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 879.9375, + "completions/mean_terminated_length": 879.9375, + "completions/min_length": 537.0, + "completions/min_terminated_length": 537.0, + "epoch": 0.08177044261065267, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.124457258958046, + "kl": 0.002960205078125, + "learning_rate": 8.149999999999999e-07, + "loss": -0.0702, + "num_tokens": 14554724.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0233392715454102, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.012813465699783052, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06010180445369851, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746356, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 327 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1145.0, + "completions/max_terminated_length": 1145.0, + "completions/mean_length": 865.4375, + "completions/mean_terminated_length": 865.4375, + "completions/min_length": 426.0, + "completions/min_terminated_length": 426.0, + "epoch": 0.08202050512628158, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.788058776036395, + "kl": 0.002574920654296875, + "learning_rate": 8.175e-07, + "loss": -0.0635, + "num_tokens": 14600747.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.729394793510437, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.029240568099167044, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1631815279759322, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13221755360572018, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 328 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1282.0, + "completions/max_terminated_length": 1282.0, + "completions/mean_length": 964.3125, + "completions/mean_terminated_length": 964.3125, + "completions/min_length": 583.0, + "completions/min_terminated_length": 583.0, + "epoch": 0.08227056764191047, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.053636456558199, + "kl": 0.003353118896484375, + "learning_rate": 8.199999999999999e-07, + "loss": 0.0148, + "num_tokens": 14649536.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.065124273300171, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1405811390291942, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11410832804365581, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09418264367902598, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 329 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1307.0, + "completions/mean_length": 1154.5625, + "completions/mean_terminated_length": 1131.533447265625, + "completions/min_length": 897.0, + "completions/min_terminated_length": 897.0, + "epoch": 0.08252063015753938, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.066423892279013, + "kl": 0.0016117095947265625, + "learning_rate": 8.225e-07, + "loss": 0.0449, + "num_tokens": 14696881.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5638798475265503, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.013789616391158242, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1271429450854783, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06763190130459204, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 330 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1412.0, + "completions/mean_length": 1258.5625, + "completions/mean_terminated_length": 1242.4666748046875, + "completions/min_length": 1131.0, + "completions/min_terminated_length": 1131.0, + "epoch": 0.08277069267316829, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.731100236355856, + "kl": 0.0019474029541015625, + "learning_rate": 8.249999999999999e-07, + "loss": -0.0087, + "num_tokens": 14745530.0, + "reward": 0.0, + "reward_std": 1.0346918106079102, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.007271793780006683, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03955132073965452, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 331 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1196.75, + "completions/mean_terminated_length": 1095.666748046875, + "completions/min_length": 445.0, + "completions/min_terminated_length": 445.0, + "epoch": 0.0830207551887972, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5276483612503817, + "kl": 0.001491546630859375, + "learning_rate": 8.275e-07, + "loss": 0.0066, + "num_tokens": 14788286.0, + "reward": 0.0, + "reward_std": 0.9156057238578796, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03361206228221996, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.039903666382939726, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258096, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 332 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1393.0, + "completions/max_terminated_length": 1393.0, + "completions/mean_length": 1181.375, + "completions/mean_terminated_length": 1181.375, + "completions/min_length": 993.0, + "completions/min_terminated_length": 993.0, + "epoch": 0.08327081770442611, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.721280967218134, + "kl": 0.00164794921875, + "learning_rate": 8.299999999999999e-07, + "loss": -0.0128, + "num_tokens": 14840124.0, + "reward": 0.0, + "reward_std": 0.4925474226474762, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.021740438199963255, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2487995964006736, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1031898645611484, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 333 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1496.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1312.875, + "completions/mean_terminated_length": 1312.875, + "completions/min_length": 1122.0, + "completions/min_terminated_length": 1122.0, + "epoch": 0.08352088022005501, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.471917141930717, + "kl": 0.001712799072265625, + "learning_rate": 8.325e-07, + "loss": 0.0088, + "num_tokens": 14886858.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0064525604248047, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07363148831313289, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06593763458724879, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0687184270936277, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 334 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1388.0, + "completions/mean_length": 1105.375, + "completions/mean_terminated_length": 1079.0667724609375, + "completions/min_length": 603.0, + "completions/min_terminated_length": 603.0, + "epoch": 0.08377094273568392, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.657325044203086, + "kl": 0.0028839111328125, + "learning_rate": 8.349999999999999e-07, + "loss": 0.0358, + "num_tokens": 14937776.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6808573007583618, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10260058312591434, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15607972134537035, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09269623828717428, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 335 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1149.0, + "completions/max_terminated_length": 1149.0, + "completions/mean_length": 830.625, + "completions/mean_terminated_length": 830.625, + "completions/min_length": 560.0, + "completions/min_terminated_length": 560.0, + "epoch": 0.08402100525131283, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.457362747334063, + "kl": 0.002925872802734375, + "learning_rate": 8.375e-07, + "loss": -0.0048, + "num_tokens": 14973554.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.045867681503296, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00509603431457704, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0370504854910776, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.2057956555708887, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 336 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1437.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 737.0, + "completions/mean_terminated_length": 737.0, + "completions/min_length": 369.0, + "completions/min_terminated_length": 369.0, + "epoch": 0.08427106776694174, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.500381560439774, + "kl": 0.0024471282958984375, + "learning_rate": 8.399999999999999e-07, + "loss": -0.1564, + "num_tokens": 15009874.0, + "reward": -1.1175870895385742e-08, + "reward_std": 0.9776280522346497, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07083162363907806, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1969096474711674, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11344765475923409, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 337 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1107.0, + "completions/mean_length": 1208.0625, + "completions/mean_terminated_length": 916.125, + "completions/min_length": 801.0, + "completions/min_terminated_length": 801.0, + "epoch": 0.08452113028257065, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0383310065767586, + "kl": 0.002216339111328125, + "learning_rate": 8.425e-07, + "loss": 0.0105, + "num_tokens": 15054035.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0197306871414185, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05254282292101622, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08153996768204855, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06540472290116198, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 338 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1380.0, + "completions/mean_length": 1076.375, + "completions/mean_terminated_length": 978.6154174804688, + "completions/min_length": 505.0, + "completions/min_terminated_length": 505.0, + "epoch": 0.08477119279819954, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2078141284159267, + "kl": 0.00196075439453125, + "learning_rate": 8.45e-07, + "loss": 0.0444, + "num_tokens": 15114625.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6249406933784485, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.040890047105607896, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09649390361430946, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6416666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242309, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 339 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1354.0, + "completions/mean_length": 1170.4375, + "completions/mean_terminated_length": 1123.357177734375, + "completions/min_length": 704.0, + "completions/min_terminated_length": 704.0, + "epoch": 0.08502125531382845, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.013889923136429, + "kl": 0.0025634765625, + "learning_rate": 8.475e-07, + "loss": -0.031, + "num_tokens": 15156112.0, + "reward": 0.0, + "reward_std": 1.0478525161743164, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.047044755789782684, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07901063313760887, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07503085784948506, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 340 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1289.0, + "completions/max_terminated_length": 1289.0, + "completions/mean_length": 1037.4375, + "completions/mean_terminated_length": 1037.4375, + "completions/min_length": 734.0, + "completions/min_terminated_length": 734.0, + "epoch": 0.08527131782945736, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6846856428687116, + "kl": 0.002826690673828125, + "learning_rate": 8.499999999999999e-07, + "loss": -0.0431, + "num_tokens": 15192127.0, + "reward": 0.0, + "reward_std": 0.5947579145431519, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0322272668903456, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.031166029195983973, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15438048235879215, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 341 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1402.0, + "completions/mean_length": 1189.25, + "completions/mean_terminated_length": 1048.0, + "completions/min_length": 750.0, + "completions/min_terminated_length": 750.0, + "epoch": 0.08552138034508627, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.879997127827658, + "kl": 0.002227783203125, + "learning_rate": 8.525e-07, + "loss": -0.0207, + "num_tokens": 15239395.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9360876083374023, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01692125990733046, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0752426363814972, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869927, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 342 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1385.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 1142.625, + "completions/mean_terminated_length": 1142.625, + "completions/min_length": 815.0, + "completions/min_terminated_length": 815.0, + "epoch": 0.08577144286071518, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6637703542809996, + "kl": 0.001811981201171875, + "learning_rate": 8.55e-07, + "loss": -0.0266, + "num_tokens": 15290853.0, + "reward": 0.0, + "reward_std": 0.6429154872894287, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04411794436571727, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05861794475122207, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11155467020454343, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 343 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1314.0, + "completions/max_terminated_length": 1314.0, + "completions/mean_length": 960.0625, + "completions/mean_terminated_length": 960.0625, + "completions/min_length": 648.0, + "completions/min_terminated_length": 648.0, + "epoch": 0.08602150537634409, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3885640296301225, + "kl": 0.002086639404296875, + "learning_rate": 8.575e-07, + "loss": -0.0155, + "num_tokens": 15343766.0, + "reward": 0.0, + "reward_std": 0.6706966757774353, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.023512815067119244, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1240625418169997, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07290277645477443, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 344 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1143.25, + "completions/mean_terminated_length": 1092.2857666015625, + "completions/min_length": 590.0, + "completions/min_terminated_length": 590.0, + "epoch": 0.08627156789197299, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.271318334235451, + "kl": 0.00270843505859375, + "learning_rate": 8.599999999999999e-07, + "loss": -0.0382, + "num_tokens": 15387554.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.7541489005088806, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567834, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 345 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1346.0, + "completions/mean_length": 1204.8125, + "completions/mean_terminated_length": 1136.6923828125, + "completions/min_length": 747.0, + "completions/min_terminated_length": 747.0, + "epoch": 0.0865216304076019, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4140790554511353, + "kl": 0.0030364990234375, + "learning_rate": 8.625e-07, + "loss": -0.0782, + "num_tokens": 15439959.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9763940572738647, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011346011776908752, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0539521093254643, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059628479399994376, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 346 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.8125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1459.5625, + "completions/mean_terminated_length": 1284.3333740234375, + "completions/min_length": 1071.0, + "completions/min_terminated_length": 1071.0, + "epoch": 0.08677169292323081, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3251525741007653, + "kl": 0.0013446807861328125, + "learning_rate": 8.65e-07, + "loss": 0.0148, + "num_tokens": 15504952.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.7711533308029175, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.13793615366600281, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1010391604190239, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.65, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1276569477008451, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 347 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1272.0, + "completions/max_terminated_length": 1272.0, + "completions/mean_length": 875.6875, + "completions/mean_terminated_length": 875.6875, + "completions/min_length": 610.0, + "completions/min_terminated_length": 610.0, + "epoch": 0.08702175543885972, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.007886244849441, + "kl": 0.00292205810546875, + "learning_rate": 8.675000000000001e-07, + "loss": 0.0147, + "num_tokens": 15544483.0, + "reward": 0.0, + "reward_std": 0.6377196907997131, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01458356049186296, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20152274104018866, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952503, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 348 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1258.625, + "completions/mean_terminated_length": 1178.166748046875, + "completions/min_length": 641.0, + "completions/min_terminated_length": 641.0, + "epoch": 0.08727181795448863, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3859166732117125, + "kl": 0.0017757415771484375, + "learning_rate": 8.699999999999999e-07, + "loss": -0.04, + "num_tokens": 15594933.0, + "reward": 0.0, + "reward_std": 0.8141038417816162, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.033333775716339734, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.139469304928335, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026005, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 349 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1175.0, + "completions/max_terminated_length": 1175.0, + "completions/mean_length": 933.3125, + "completions/mean_terminated_length": 933.3125, + "completions/min_length": 648.0, + "completions/min_terminated_length": 648.0, + "epoch": 0.08752188047011752, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6549895796317693, + "kl": 0.00151824951171875, + "learning_rate": 8.725e-07, + "loss": -0.0075, + "num_tokens": 15628474.0, + "reward": 0.0, + "reward_std": 1.0287435054779053, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01987649930255723, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.057484924316988886, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 350 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1377.0, + "completions/mean_length": 1162.8125, + "completions/mean_terminated_length": 1140.3333740234375, + "completions/min_length": 847.0, + "completions/min_terminated_length": 847.0, + "epoch": 0.08777194298574643, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.456860848787908, + "kl": 0.002666473388671875, + "learning_rate": 8.75e-07, + "loss": 0.0232, + "num_tokens": 15676071.0, + "reward": 0.0, + "reward_std": 0.9965443015098572, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06761781682440769, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07672338111852664, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14782371884055637, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 351 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1279.0, + "completions/mean_length": 1073.6875, + "completions/mean_terminated_length": 1045.2667236328125, + "completions/min_length": 728.0, + "completions/min_terminated_length": 728.0, + "epoch": 0.08802200550137534, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7229335215099586, + "kl": 0.0019016265869140625, + "learning_rate": 8.774999999999999e-07, + "loss": 0.0141, + "num_tokens": 15713738.0, + "reward": 0.0, + "reward_std": 0.8237835764884949, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10832995271895104, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08363471724937753, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6499999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.057089922571845, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 352 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1071.0, + "completions/mean_length": 854.4375, + "completions/mean_terminated_length": 811.4000244140625, + "completions/min_length": 645.0, + "completions/min_terminated_length": 645.0, + "epoch": 0.08827206801700425, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.82612182497941, + "kl": 0.0016384124755859375, + "learning_rate": 8.799999999999999e-07, + "loss": -0.0069, + "num_tokens": 15751129.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.025823712348938, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.006081329252532072, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3845502261144711, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1172998689652263, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 353 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1244.0, + "completions/max_terminated_length": 1244.0, + "completions/mean_length": 961.1875, + "completions/mean_terminated_length": 961.1875, + "completions/min_length": 767.0, + "completions/min_terminated_length": 767.0, + "epoch": 0.08852213053263316, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.5956958091128846, + "kl": 0.0005013942718505859, + "learning_rate": 8.824999999999999e-07, + "loss": -0.025, + "num_tokens": 15788404.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6354619264602661, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.035296019318860894, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10086396151265722, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13214750456578048, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 354 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1084.75, + "completions/mean_terminated_length": 1025.4285888671875, + "completions/min_length": 708.0, + "completions/min_terminated_length": 708.0, + "epoch": 0.08877219304826206, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.275262603665347, + "kl": 0.00238037109375, + "learning_rate": 8.85e-07, + "loss": -0.0783, + "num_tokens": 15837752.0, + "reward": 2.2351741790771484e-08, + "reward_std": 0.8844678401947021, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.026889790393115565, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15748308049610446, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08509254221575908, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 355 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1230.8125, + "completions/mean_terminated_length": 1212.86669921875, + "completions/min_length": 1029.0, + "completions/min_terminated_length": 1029.0, + "epoch": 0.08902225556389097, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4236852233842883, + "kl": 0.002964019775390625, + "learning_rate": 8.874999999999999e-07, + "loss": -0.0321, + "num_tokens": 15887717.0, + "reward": 0.0, + "reward_std": 0.8186800479888916, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09751195195141989, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10299106314597761, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1128748897706693, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 356 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1384.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1123.75, + "completions/mean_terminated_length": 1123.75, + "completions/min_length": 826.0, + "completions/min_terminated_length": 826.0, + "epoch": 0.08927231807951988, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3344527820292704, + "kl": 0.0025005340576171875, + "learning_rate": 8.9e-07, + "loss": -0.0323, + "num_tokens": 15938865.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0575263500213623, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.028241482308480698, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06927080688788102, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11979921473804349, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 357 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1220.0, + "completions/mean_length": 1282.625, + "completions/mean_terminated_length": 1065.25, + "completions/min_length": 925.0, + "completions/min_terminated_length": 925.0, + "epoch": 0.08952238059514879, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1916384165203096, + "kl": 0.0012950897216796875, + "learning_rate": 8.924999999999999e-07, + "loss": 0.0194, + "num_tokens": 15979915.0, + "reward": 0.0, + "reward_std": 0.5970163345336914, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.23811094342357525, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3434041585851836, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13924399049470285, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 358 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1470.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 845.25, + "completions/mean_terminated_length": 845.25, + "completions/min_length": 487.0, + "completions/min_terminated_length": 487.0, + "epoch": 0.0897724431107777, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.539805557723218, + "kl": 0.001262664794921875, + "learning_rate": 8.95e-07, + "loss": -0.0784, + "num_tokens": 16030511.0, + "reward": 0.0, + "reward_std": 1.0151575803756714, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.011466624827418361, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0799538205844789, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6416666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1029203215725281, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 359 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1193.25, + "completions/mean_terminated_length": 1172.800048828125, + "completions/min_length": 939.0, + "completions/min_terminated_length": 939.0, + "epoch": 0.0900225056264066, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6776349757321594, + "kl": 0.001911163330078125, + "learning_rate": 8.974999999999999e-07, + "loss": -0.0016, + "num_tokens": 16077875.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.006119728088379, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03783256767033636, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0696421162927018, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11013459778666117, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 360 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1256.0, + "completions/max_terminated_length": 1256.0, + "completions/mean_length": 1112.5, + "completions/mean_terminated_length": 1112.5, + "completions/min_length": 749.0, + "completions/min_terminated_length": 749.0, + "epoch": 0.0902725681420355, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.418757246659356, + "kl": 0.002872467041015625, + "learning_rate": 9e-07, + "loss": -0.0055, + "num_tokens": 16127251.0, + "reward": 0.0, + "reward_std": 0.8976947069168091, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.026010613678077783, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1512811014360346, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0859586463881842, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 361 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1266.0, + "completions/max_terminated_length": 1266.0, + "completions/mean_length": 1064.875, + "completions/mean_terminated_length": 1064.875, + "completions/min_length": 794.0, + "completions/min_terminated_length": 794.0, + "epoch": 0.09052263065766442, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.572249012711395, + "kl": 0.0013608932495117188, + "learning_rate": 9.024999999999999e-07, + "loss": 0.03, + "num_tokens": 16177265.0, + "reward": 0.0, + "reward_std": 0.949448823928833, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03956199710969971, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.107483485991424, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04554200340426487, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 362 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1241.6875, + "completions/mean_terminated_length": 1204.7857666015625, + "completions/min_length": 932.0, + "completions/min_terminated_length": 932.0, + "epoch": 0.09077269317329333, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3453470216331467, + "kl": 0.0017337799072265625, + "learning_rate": 9.05e-07, + "loss": 0.0193, + "num_tokens": 16227212.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7464134693145752, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08666050871030406, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09618027294540046, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 363 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1396.0, + "completions/max_terminated_length": 1396.0, + "completions/mean_length": 1064.9375, + "completions/mean_terminated_length": 1064.9375, + "completions/min_length": 768.0, + "completions/min_terminated_length": 768.0, + "epoch": 0.09102275568892224, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8141947926334487, + "kl": 0.003021240234375, + "learning_rate": 9.074999999999999e-07, + "loss": 0.0226, + "num_tokens": 16276819.0, + "reward": 0.0, + "reward_std": 0.9808779954910278, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01621201628227087, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04545424576578614, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10749676997731401, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 364 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1272.0, + "completions/max_terminated_length": 1272.0, + "completions/mean_length": 1066.8125, + "completions/mean_terminated_length": 1066.8125, + "completions/min_length": 941.0, + "completions/min_terminated_length": 941.0, + "epoch": 0.09127281820455113, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5881630549561483, + "kl": 0.0015544891357421875, + "learning_rate": 9.1e-07, + "loss": 0.0075, + "num_tokens": 16313760.0, + "reward": 0.0, + "reward_std": 0.8433870077133179, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.023190679482383094, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05926190696244798, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06652763279965648, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 365 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1114.1875, + "completions/mean_terminated_length": 1088.4666748046875, + "completions/min_length": 638.0, + "completions/min_terminated_length": 638.0, + "epoch": 0.09152288072018004, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.843613032041693, + "kl": 0.001979827880859375, + "learning_rate": 9.124999999999999e-07, + "loss": -0.0177, + "num_tokens": 16369083.0, + "reward": 4.470348358154297e-08, + "reward_std": 0.8985847234725952, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.003114196200683504, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04041457836877238, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06763190130459203, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 366 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1225.0, + "completions/max_terminated_length": 1225.0, + "completions/mean_length": 1057.75, + "completions/mean_terminated_length": 1057.75, + "completions/min_length": 809.0, + "completions/min_terminated_length": 809.0, + "epoch": 0.09177294323580895, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.38083256340822, + "kl": 0.001346588134765625, + "learning_rate": 9.15e-07, + "loss": 0.0064, + "num_tokens": 16409223.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9797626733779907, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.008703855575430708, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11274286193191758, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09888264649460886, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 367 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1239.9375, + "completions/mean_terminated_length": 1083.9000244140625, + "completions/min_length": 967.0, + "completions/min_terminated_length": 967.0, + "epoch": 0.09202300575143786, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9000771543562895, + "kl": 0.002178192138671875, + "learning_rate": 9.174999999999999e-07, + "loss": -0.0338, + "num_tokens": 16458734.0, + "reward": 0.0, + "reward_std": 0.4562618136405945, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.016211265101017225, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02118798019994677, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1437590576856522, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 368 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1288.0, + "completions/mean_length": 1181.1875, + "completions/mean_terminated_length": 933.2222290039062, + "completions/min_length": 751.0, + "completions/min_terminated_length": 751.0, + "epoch": 0.09227306826706677, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.944781899877691, + "kl": 0.0017719268798828125, + "learning_rate": 9.2e-07, + "loss": -0.0013, + "num_tokens": 16502417.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0247652530670166, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.047107164733465594, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09262227782935406, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869929, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 369 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1114.375, + "completions/mean_terminated_length": 985.8333740234375, + "completions/min_length": 685.0, + "completions/min_terminated_length": 685.0, + "epoch": 0.09252313078269567, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3497884810460588, + "kl": 0.0016574859619140625, + "learning_rate": 9.225e-07, + "loss": 0.0136, + "num_tokens": 16544463.0, + "reward": 0.0, + "reward_std": 0.8893084526062012, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1064653629661812, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12193229114324064, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10878112581387149, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 370 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1403.0, + "completions/max_terminated_length": 1403.0, + "completions/mean_length": 1048.4375, + "completions/mean_terminated_length": 1048.4375, + "completions/min_length": 580.0, + "completions/min_terminated_length": 580.0, + "epoch": 0.09277319329832458, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7332698397284756, + "kl": 0.00174713134765625, + "learning_rate": 9.25e-07, + "loss": -0.031, + "num_tokens": 16581438.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0379033088684082, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.20118965684171258, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16136456717809733, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07490735018081414, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 371 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1050.0, + "completions/mean_terminated_length": 845.45458984375, + "completions/min_length": 506.0, + "completions/min_terminated_length": 506.0, + "epoch": 0.09302325581395349, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5619152316900644, + "kl": 0.003368377685546875, + "learning_rate": 9.274999999999999e-07, + "loss": -0.0051, + "num_tokens": 16636198.0, + "reward": 0.0, + "reward_std": 0.6232237815856934, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10996561534480079, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08895653684078475, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1102186379345533, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 372 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1450.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1024.5625, + "completions/mean_terminated_length": 1024.5625, + "completions/min_length": 645.0, + "completions/min_terminated_length": 645.0, + "epoch": 0.0932733183295824, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.923727283382486, + "kl": 0.003509521484375, + "learning_rate": 9.3e-07, + "loss": 0.0003, + "num_tokens": 16695871.0, + "reward": 0.0, + "reward_std": 0.9178142547607422, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.14205963128406054, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16179042475424904, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563382, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 373 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1160.0, + "completions/mean_terminated_length": 1137.3333740234375, + "completions/min_length": 948.0, + "completions/min_terminated_length": 948.0, + "epoch": 0.0935233808452113, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4961573773684522, + "kl": 0.0034637451171875, + "learning_rate": 9.325e-07, + "loss": 0.0353, + "num_tokens": 16745199.0, + "reward": 0.0, + "reward_std": 0.8547050952911377, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02748821837994313, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1207604481757701, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 374 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1327.0, + "completions/max_terminated_length": 1327.0, + "completions/mean_length": 1115.3125, + "completions/mean_terminated_length": 1115.3125, + "completions/min_length": 685.0, + "completions/min_terminated_length": 685.0, + "epoch": 0.09377344336084022, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3828583236544403, + "kl": 0.001560211181640625, + "learning_rate": 9.35e-07, + "loss": -0.0347, + "num_tokens": 16790876.0, + "reward": 0.0, + "reward_std": 0.7253067493438721, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019862116666040486, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07865193217374973, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13305526559931294, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 375 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1334.3125, + "completions/mean_terminated_length": 1279.0833740234375, + "completions/min_length": 1070.0, + "completions/min_terminated_length": 1070.0, + "epoch": 0.09402350587646911, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.787089353002443, + "kl": 0.0023651123046875, + "learning_rate": 9.374999999999999e-07, + "loss": -0.0247, + "num_tokens": 16833593.0, + "reward": 0.0, + "reward_std": 0.9819654226303101, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.010667703092463997, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0911867559952063, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026005, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 376 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1316.0, + "completions/max_terminated_length": 1316.0, + "completions/mean_length": 1044.1875, + "completions/mean_terminated_length": 1044.1875, + "completions/min_length": 866.0, + "completions/min_terminated_length": 866.0, + "epoch": 0.09427356839209802, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8960762018207915, + "kl": 0.00218963623046875, + "learning_rate": 9.399999999999999e-07, + "loss": -0.0084, + "num_tokens": 16873436.0, + "reward": 4.470348358154297e-08, + "reward_std": 1.004173755645752, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08696517305653965, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09773497449272187, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 377 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1092.3125, + "completions/mean_terminated_length": 998.2308349609375, + "completions/min_length": 703.0, + "completions/min_terminated_length": 703.0, + "epoch": 0.09452363090772693, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2533668206610535, + "kl": 0.002765655517578125, + "learning_rate": 9.425e-07, + "loss": -0.0107, + "num_tokens": 16916153.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.999409556388855, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.014483176260501806, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07512363294670318, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0596284793999944, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 378 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1435.0, + "completions/max_terminated_length": 1435.0, + "completions/mean_length": 1091.875, + "completions/mean_terminated_length": 1091.875, + "completions/min_length": 782.0, + "completions/min_terminated_length": 782.0, + "epoch": 0.09477369342335584, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6228233772405725, + "kl": 0.003269195556640625, + "learning_rate": 9.45e-07, + "loss": -0.0203, + "num_tokens": 16958415.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9559187889099121, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010419095087095695, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04256636558621845, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921948, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 379 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1158.875, + "completions/mean_terminated_length": 1110.1429443359375, + "completions/min_length": 912.0, + "completions/min_terminated_length": 912.0, + "epoch": 0.09502375593898475, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2083837055816535, + "kl": 0.0015716552734375, + "learning_rate": 9.474999999999999e-07, + "loss": -0.0304, + "num_tokens": 17007741.0, + "reward": 0.0, + "reward_std": 0.7496041059494019, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.018414941701545024, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1003358402764829, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 380 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1324.0, + "completions/max_terminated_length": 1324.0, + "completions/mean_length": 1027.5, + "completions/mean_terminated_length": 1027.5, + "completions/min_length": 768.0, + "completions/min_terminated_length": 768.0, + "epoch": 0.09527381845461365, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.178858189356902, + "kl": 0.003543853759765625, + "learning_rate": 9.499999999999999e-07, + "loss": 0.0355, + "num_tokens": 17049557.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7808666229248047, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.21727707609081928, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.23746863403295362, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08421753138505424, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 381 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1210.9375, + "completions/mean_terminated_length": 1191.666748046875, + "completions/min_length": 674.0, + "completions/min_terminated_length": 674.0, + "epoch": 0.09552388097024256, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2848615609347074, + "kl": 0.002841949462890625, + "learning_rate": 9.525e-07, + "loss": -0.0516, + "num_tokens": 17093524.0, + "reward": 0.0, + "reward_std": 0.8354849219322205, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.020615506273431117, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11652392509010803, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590965, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 382 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1451.0, + "completions/max_terminated_length": 1451.0, + "completions/mean_length": 1130.8125, + "completions/mean_terminated_length": 1130.8125, + "completions/min_length": 702.0, + "completions/min_terminated_length": 702.0, + "epoch": 0.09577394348587147, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2277778821928145, + "kl": 0.00193023681640625, + "learning_rate": 9.55e-07, + "loss": 0.0574, + "num_tokens": 17139129.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9785068035125732, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04092699875766233, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07819233403178678, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087683, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 383 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1372.0, + "completions/max_terminated_length": 1372.0, + "completions/mean_length": 1206.0625, + "completions/mean_terminated_length": 1206.0625, + "completions/min_length": 1046.0, + "completions/min_terminated_length": 1046.0, + "epoch": 0.09602400600150038, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.823904268674177, + "kl": 0.0020122528076171875, + "learning_rate": 9.575e-07, + "loss": 0.0118, + "num_tokens": 17185938.0, + "reward": 0.0, + "reward_std": 0.876777708530426, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.2182454411719508, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07295214407825079, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12881223774390615, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 384 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1302.4375, + "completions/mean_terminated_length": 1289.2667236328125, + "completions/min_length": 960.0, + "completions/min_terminated_length": 960.0, + "epoch": 0.09627406851712929, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0608184869291213, + "kl": 0.002780914306640625, + "learning_rate": 9.6e-07, + "loss": -0.0169, + "num_tokens": 17239801.0, + "reward": 0.0, + "reward_std": 0.7821117639541626, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12842025058217382, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1797719699799621, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09108400680852978, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 385 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1388.0, + "completions/mean_length": 1145.375, + "completions/mean_terminated_length": 1121.7333984375, + "completions/min_length": 775.0, + "completions/min_terminated_length": 775.0, + "epoch": 0.09652413103275818, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.458684423607668, + "kl": 0.0028076171875, + "learning_rate": 9.624999999999999e-07, + "loss": -0.0021, + "num_tokens": 17281599.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0083962678909302, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010084016110852339, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06189629073372294, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08944271909999159, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 386 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1226.0, + "completions/max_terminated_length": 1226.0, + "completions/mean_length": 1062.0625, + "completions/mean_terminated_length": 1062.0625, + "completions/min_length": 881.0, + "completions/min_terminated_length": 881.0, + "epoch": 0.0967741935483871, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4830176315164634, + "kl": 0.003017425537109375, + "learning_rate": 9.649999999999999e-07, + "loss": -0.0184, + "num_tokens": 17316728.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9938013553619385, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03653308137214126, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07295244422844069, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0687184270936277, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 387 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1387.875, + "completions/mean_terminated_length": 1275.75, + "completions/min_length": 730.0, + "completions/min_terminated_length": 730.0, + "epoch": 0.097024256064016, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.092184342409662, + "kl": 0.0030975341796875, + "learning_rate": 9.675e-07, + "loss": -0.0334, + "num_tokens": 17361806.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9008313417434692, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.020895333603034128, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06998442605962689, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16771890063326086, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 388 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 1223.125, + "completions/mean_terminated_length": 1130.8333740234375, + "completions/min_length": 326.0, + "completions/min_terminated_length": 326.0, + "epoch": 0.09727431857964491, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.257016196811225, + "kl": 0.001712799072265625, + "learning_rate": 9.7e-07, + "loss": -0.1056, + "num_tokens": 17420784.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8822591304779053, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02804826992765156, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04830578223575602, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0850925422157591, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 389 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 1215.4375, + "completions/mean_terminated_length": 1149.769287109375, + "completions/min_length": 629.0, + "completions/min_terminated_length": 629.0, + "epoch": 0.09752438109527382, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8876674195472147, + "kl": 0.0022144317626953125, + "learning_rate": 9.725e-07, + "loss": -0.0253, + "num_tokens": 17458519.0, + "reward": 0.0, + "reward_std": 0.8609707355499268, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.000890205888126898, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04875525087799635, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11729986896522632, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 390 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1146.0, + "completions/max_terminated_length": 1146.0, + "completions/mean_length": 947.75, + "completions/mean_terminated_length": 947.75, + "completions/min_length": 603.0, + "completions/min_terminated_length": 603.0, + "epoch": 0.09777444361090272, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3751785150658438, + "kl": 0.0014820098876953125, + "learning_rate": 9.75e-07, + "loss": -0.0086, + "num_tokens": 17495451.0, + "reward": 0.0, + "reward_std": 1.0464046001434326, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.038017810611333844, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05244035683687537, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05561108336107647, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 391 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1387.0, + "completions/max_terminated_length": 1387.0, + "completions/mean_length": 1074.3125, + "completions/mean_terminated_length": 1074.3125, + "completions/min_length": 761.0, + "completions/min_terminated_length": 761.0, + "epoch": 0.09802450612653163, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0170667348650544, + "kl": 0.0015840530395507812, + "learning_rate": 9.775e-07, + "loss": -0.0683, + "num_tokens": 17542720.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7801408171653748, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05931652536713336, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13024681873934726, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 392 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1197.0, + "completions/max_terminated_length": 1197.0, + "completions/mean_length": 932.0625, + "completions/mean_terminated_length": 932.0625, + "completions/min_length": 660.0, + "completions/min_terminated_length": 660.0, + "epoch": 0.09827456864216054, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8549684285941788, + "kl": 0.002742767333984375, + "learning_rate": 9.8e-07, + "loss": -0.031, + "num_tokens": 17570193.0, + "reward": 0.0, + "reward_std": 0.8524289727210999, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.3325566490154477, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3573511728448711, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.65, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12765694770084507, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 393 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1218.125, + "completions/mean_terminated_length": 1177.857177734375, + "completions/min_length": 891.0, + "completions/min_terminated_length": 891.0, + "epoch": 0.09852463115778945, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7455839022134394, + "kl": 0.002155303955078125, + "learning_rate": 9.825e-07, + "loss": 0.0531, + "num_tokens": 17616699.0, + "reward": 0.0, + "reward_std": 0.9136618375778198, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.2656786540786118, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.38981542290339427, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06763190130459203, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 394 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1338.0, + "completions/mean_length": 1256.5, + "completions/mean_terminated_length": 1175.3333740234375, + "completions/min_length": 1031.0, + "completions/min_terminated_length": 1031.0, + "epoch": 0.09877469367341836, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7823705298735697, + "kl": 0.002353668212890625, + "learning_rate": 9.849999999999999e-07, + "loss": -0.0126, + "num_tokens": 17668907.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.44748926162719727, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10223704028345797, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12152454043199322, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14782371884055637, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 395 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1455.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1079.9375, + "completions/mean_terminated_length": 1079.9375, + "completions/min_length": 807.0, + "completions/min_terminated_length": 807.0, + "epoch": 0.09902475618904726, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.302195166632783, + "kl": 0.0023860931396484375, + "learning_rate": 9.875e-07, + "loss": 0.0161, + "num_tokens": 17716306.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.48119497299194336, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.062828158235606, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07572207129906613, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09496588081262934, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 396 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1458.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 977.625, + "completions/mean_terminated_length": 977.625, + "completions/min_length": 743.0, + "completions/min_terminated_length": 743.0, + "epoch": 0.09927481870467617, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.908543017539533, + "kl": 0.00231170654296875, + "learning_rate": 9.9e-07, + "loss": -0.0369, + "num_tokens": 17777708.0, + "reward": -1.862645149230957e-09, + "reward_std": 1.0572876930236816, + "rewards/wordcountpos_reward_GEOBench/mean": -1.862645149230957e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.039631929523983346, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06067875523113735, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0749073501808141, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 397 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1429.0, + "completions/max_terminated_length": 1429.0, + "completions/mean_length": 1049.0625, + "completions/mean_terminated_length": 1049.0625, + "completions/min_length": 828.0, + "completions/min_terminated_length": 828.0, + "epoch": 0.09952488122030508, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4164953663713455, + "kl": 0.00264739990234375, + "learning_rate": 9.925e-07, + "loss": -0.0022, + "num_tokens": 17818901.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9157768487930298, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.042287498985858346, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11061801447516822, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09737289911202955, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 398 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1383.0, + "completions/max_terminated_length": 1383.0, + "completions/mean_length": 1073.5625, + "completions/mean_terminated_length": 1073.5625, + "completions/min_length": 721.0, + "completions/min_terminated_length": 721.0, + "epoch": 0.09977494373593399, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.339255844786321, + "kl": 0.002819061279296875, + "learning_rate": 9.95e-07, + "loss": 0.0344, + "num_tokens": 17862510.0, + "reward": 0.0, + "reward_std": 0.8882852792739868, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07941855514590238, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05563103122002795, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10027739304327549, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 399 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1379.0, + "completions/max_terminated_length": 1379.0, + "completions/mean_length": 853.9375, + "completions/mean_terminated_length": 853.9375, + "completions/min_length": 511.0, + "completions/min_terminated_length": 511.0, + "epoch": 0.1000250062515629, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.413183575298617, + "kl": 0.003345489501953125, + "learning_rate": 9.975e-07, + "loss": 0.0128, + "num_tokens": 17902029.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.966637372970581, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.021911533719654527, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1585205788131985, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0843274042711568, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 400 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1157.0, + "completions/max_terminated_length": 1157.0, + "completions/mean_length": 985.4375, + "completions/mean_terminated_length": 985.4375, + "completions/min_length": 780.0, + "completions/min_terminated_length": 780.0, + "epoch": 0.10027506876719179, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.724735844545274, + "kl": 0.0030059814453125, + "learning_rate": 1e-06, + "loss": -0.0441, + "num_tokens": 17945716.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7285774946212769, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0050505658214612375, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0758397401359648, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13045504405165223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 401 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1348.0, + "completions/mean_length": 1130.1875, + "completions/mean_terminated_length": 1077.357177734375, + "completions/min_length": 914.0, + "completions/min_terminated_length": 914.0, + "epoch": 0.1005251312828207, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7276824777799686, + "kl": 0.0020542144775390625, + "learning_rate": 9.999998285574796e-07, + "loss": 0.03, + "num_tokens": 17987247.0, + "reward": 0.0, + "reward_std": 0.7956888675689697, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07526557916325621, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08164375171725943, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.053748384988657014, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 402 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1230.0, + "completions/max_terminated_length": 1230.0, + "completions/mean_length": 1074.5, + "completions/mean_terminated_length": 1074.5, + "completions/min_length": 789.0, + "completions/min_terminated_length": 789.0, + "epoch": 0.10077519379844961, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7911779400626777, + "kl": 0.002838134765625, + "learning_rate": 9.99999314230049e-07, + "loss": 0.0254, + "num_tokens": 18034687.0, + "reward": 0.0, + "reward_std": 1.0158910751342773, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.17600135422441351, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11918007940380357, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316811, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 403 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1295.0, + "completions/mean_length": 1207.125, + "completions/mean_terminated_length": 1139.5384521484375, + "completions/min_length": 1039.0, + "completions/min_terminated_length": 1039.0, + "epoch": 0.10102525631407852, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.994232500734222, + "kl": 0.002315521240234375, + "learning_rate": 9.999984570181004e-07, + "loss": 0.0124, + "num_tokens": 18092441.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.5951977372169495, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.020331638975317062, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.032831953642798416, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12041594578792297, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 404 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1402.0, + "completions/max_terminated_length": 1402.0, + "completions/mean_length": 1104.0625, + "completions/mean_terminated_length": 1104.0625, + "completions/min_length": 913.0, + "completions/min_terminated_length": 913.0, + "epoch": 0.10127531882970743, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6461935308183646, + "kl": 0.0033416748046875, + "learning_rate": 9.999972569222867e-07, + "loss": 0.0362, + "num_tokens": 18130762.0, + "reward": 0.0, + "reward_std": 0.9406242370605469, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.015606644841351769, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07346737918929852, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08073734277593314, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 405 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1316.0, + "completions/max_terminated_length": 1316.0, + "completions/mean_length": 1084.0625, + "completions/mean_terminated_length": 1084.0625, + "completions/min_length": 786.0, + "completions/min_terminated_length": 786.0, + "epoch": 0.10152538134533634, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.525227780827014, + "kl": 0.00336456298828125, + "learning_rate": 9.999957139435225e-07, + "loss": -0.0481, + "num_tokens": 18173043.0, + "reward": 0.0, + "reward_std": 0.9337491393089294, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.018321181362742477, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.034283239249879464, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 406 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1383.0, + "completions/mean_length": 1213.3125, + "completions/mean_terminated_length": 1147.1539306640625, + "completions/min_length": 924.0, + "completions/min_terminated_length": 924.0, + "epoch": 0.10177544386096524, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.045677192987912, + "kl": 0.00289154052734375, + "learning_rate": 9.999938280829832e-07, + "loss": -0.0256, + "num_tokens": 18217544.0, + "reward": 0.0, + "reward_std": 0.7943310737609863, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05942170915915708, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.060338125637511335, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5791666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941135, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 407 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1391.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1093.625, + "completions/mean_terminated_length": 1093.625, + "completions/min_length": 740.0, + "completions/min_terminated_length": 740.0, + "epoch": 0.10202550637659415, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.865226434622248, + "kl": 0.003322601318359375, + "learning_rate": 9.99991599342106e-07, + "loss": -0.0518, + "num_tokens": 18271178.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0633788108825684, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.037091154686884543, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0737366178013423, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09428090415820636, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 408 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1189.0, + "completions/max_terminated_length": 1189.0, + "completions/mean_length": 833.8125, + "completions/mean_terminated_length": 833.8125, + "completions/min_length": 520.0, + "completions/min_terminated_length": 520.0, + "epoch": 0.10227556889222306, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.390102880431747, + "kl": 0.0022296905517578125, + "learning_rate": 9.999890277225893e-07, + "loss": -0.0064, + "num_tokens": 18298231.0, + "reward": 0.0, + "reward_std": 0.5105856657028198, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.056842702949623264, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0682260947942915, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09269623828717428, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 409 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1160.0625, + "completions/mean_terminated_length": 1081.615478515625, + "completions/min_length": 697.0, + "completions/min_terminated_length": 697.0, + "epoch": 0.10252563140785197, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.722834182457097, + "kl": 0.001842498779296875, + "learning_rate": 9.999861132263923e-07, + "loss": -0.0132, + "num_tokens": 18345128.0, + "reward": 0.0, + "reward_std": 0.5252693891525269, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04003883347594085, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05606600804562579, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1067187372905475, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 410 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1377.0, + "completions/mean_length": 1123.375, + "completions/mean_terminated_length": 1098.2667236328125, + "completions/min_length": 764.0, + "completions/min_terminated_length": 764.0, + "epoch": 0.10277569392348088, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4211654155324407, + "kl": 0.003276824951171875, + "learning_rate": 9.999828558557357e-07, + "loss": -0.0016, + "num_tokens": 18389662.0, + "reward": 0.0, + "reward_std": 0.7185800075531006, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.039960047296296056, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05037813653940092, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 411 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1185.0, + "completions/max_terminated_length": 1185.0, + "completions/mean_length": 987.875, + "completions/mean_terminated_length": 987.875, + "completions/min_length": 747.0, + "completions/min_terminated_length": 747.0, + "epoch": 0.10302575643910977, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.498388199866208, + "kl": 0.0017108917236328125, + "learning_rate": 9.999792556131017e-07, + "loss": -0.0425, + "num_tokens": 18432412.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.7068929672241211, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.15896394127568017, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11915743132875316, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13977495139343474, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 412 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1305.0, + "completions/mean_length": 1045.75, + "completions/mean_terminated_length": 1015.4667358398438, + "completions/min_length": 751.0, + "completions/min_terminated_length": 751.0, + "epoch": 0.10327581895473868, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9448163454419025, + "kl": 0.002292633056640625, + "learning_rate": 9.999753125012334e-07, + "loss": -0.0364, + "num_tokens": 18487168.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9043693542480469, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03969252923642197, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1072244579588454, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1102186379345533, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 413 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1283.5, + "completions/mean_terminated_length": 1211.3333740234375, + "completions/min_length": 1012.0, + "completions/min_terminated_length": 1012.0, + "epoch": 0.10352588147036759, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9870320411030757, + "kl": 0.002712249755859375, + "learning_rate": 9.999710265231357e-07, + "loss": -0.0267, + "num_tokens": 18540272.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5313631296157837, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06365460682333254, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08305805103378404, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13158576980363346, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 414 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 767.0, + "completions/mean_length": 1056.6875, + "completions/mean_terminated_length": 613.375, + "completions/min_length": 530.0, + "completions/min_terminated_length": 530.0, + "epoch": 0.1037759439859965, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.355531637600689, + "kl": 0.003208160400390625, + "learning_rate": 9.999663976820738e-07, + "loss": -0.0203, + "num_tokens": 18593115.0, + "reward": 0.0, + "reward_std": 0.3969656229019165, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02996520609378857, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21551930730541666, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13770607453181927, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 415 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1390.0, + "completions/mean_length": 1173.875, + "completions/mean_terminated_length": 1127.2857666015625, + "completions/min_length": 798.0, + "completions/min_terminated_length": 798.0, + "epoch": 0.10402600650162541, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1844051680157164, + "kl": 0.00296783447265625, + "learning_rate": 9.999614259815751e-07, + "loss": 0.0003, + "num_tokens": 18628481.0, + "reward": 0.0, + "reward_std": 0.99803626537323, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.005688443190054581, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.01228474630921147, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09098229375970789, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 416 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1256.0625, + "completions/mean_terminated_length": 1145.181884765625, + "completions/min_length": 939.0, + "completions/min_terminated_length": 939.0, + "epoch": 0.10427606901725431, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.578448844736388, + "kl": 0.001501321792602539, + "learning_rate": 9.99956111425428e-07, + "loss": 0.0098, + "num_tokens": 18678290.0, + "reward": 0.0, + "reward_std": 1.0209581851959229, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08104310983591446, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12642668929548362, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.094182643679026, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 417 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1161.9375, + "completions/mean_terminated_length": 1139.4000244140625, + "completions/min_length": 848.0, + "completions/min_terminated_length": 848.0, + "epoch": 0.10452613153288322, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.991223945535393, + "kl": 0.00232696533203125, + "learning_rate": 9.999504540176816e-07, + "loss": -0.0254, + "num_tokens": 18728745.0, + "reward": 0.0, + "reward_std": 0.42453551292419434, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0756995310355624, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11575388116466144, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10327955589886448, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 418 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1433.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1164.0625, + "completions/mean_terminated_length": 1164.0625, + "completions/min_length": 953.0, + "completions/min_terminated_length": 953.0, + "epoch": 0.10477619404851213, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8809513015563413, + "kl": 0.0007367134094238281, + "learning_rate": 9.999444537626468e-07, + "loss": -0.0526, + "num_tokens": 18770250.0, + "reward": 1.3969838619232178e-09, + "reward_std": 1.0341196060180664, + "rewards/wordcountpos_reward_GEOBench/mean": 1.3969838619232178e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.032196729970893075, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0736348327431488, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 419 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1257.0, + "completions/mean_length": 1060.4375, + "completions/mean_terminated_length": 997.6428833007812, + "completions/min_length": 608.0, + "completions/min_terminated_length": 608.0, + "epoch": 0.10502625656414104, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.108247194177743, + "kl": 0.00617218017578125, + "learning_rate": 9.999381106648958e-07, + "loss": -0.049, + "num_tokens": 18817881.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0104570388793945, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0768872503814593, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08851496877124741, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078612, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 420 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 932.0, + "completions/max_terminated_length": 932.0, + "completions/mean_length": 833.9375, + "completions/mean_terminated_length": 833.9375, + "completions/min_length": 672.0, + "completions/min_terminated_length": 672.0, + "epoch": 0.10527631907976995, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4116836389041296, + "kl": 0.00226593017578125, + "learning_rate": 9.999314247292617e-07, + "loss": -0.0253, + "num_tokens": 18849384.0, + "reward": 0.0, + "reward_std": 0.4989623725414276, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08478290474479218, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.116369236787899, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.19851299052557597, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 421 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1238.0, + "completions/max_terminated_length": 1238.0, + "completions/mean_length": 1029.375, + "completions/mean_terminated_length": 1029.375, + "completions/min_length": 894.0, + "completions/min_terminated_length": 894.0, + "epoch": 0.10552638159539884, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6621110182740733, + "kl": 0.0021038055419921875, + "learning_rate": 9.999243959608388e-07, + "loss": 0.0197, + "num_tokens": 18887902.0, + "reward": 0.0, + "reward_std": 0.9978680610656738, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.028123753128822683, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09758655833367338, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503964, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 422 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1200.0, + "completions/max_terminated_length": 1200.0, + "completions/mean_length": 998.6875, + "completions/mean_terminated_length": 998.6875, + "completions/min_length": 820.0, + "completions/min_terminated_length": 820.0, + "epoch": 0.10577644411102775, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.190237221912177, + "kl": 0.001277923583984375, + "learning_rate": 9.99917024364983e-07, + "loss": 0.0147, + "num_tokens": 18923425.0, + "reward": 0.0, + "reward_std": 0.767393171787262, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.23017046475530423, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2996837752408584, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06763190130459203, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 423 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 1202.125, + "completions/mean_terminated_length": 1182.2667236328125, + "completions/min_length": 901.0, + "completions/min_terminated_length": 901.0, + "epoch": 0.10602650662665666, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1305329101433403, + "kl": 0.00272369384765625, + "learning_rate": 9.999093099473112e-07, + "loss": -0.016, + "num_tokens": 18977939.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0331313610076904, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10159333484556354, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0926185119453781, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590966, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 424 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1327.0, + "completions/mean_length": 1021.0, + "completions/mean_terminated_length": 989.0667114257812, + "completions/min_length": 675.0, + "completions/min_terminated_length": 675.0, + "epoch": 0.10627656914228557, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.939653809492422, + "kl": 0.001674652099609375, + "learning_rate": 9.999012527137013e-07, + "loss": -0.0577, + "num_tokens": 19022963.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0142796039581299, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06406590337502378, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21208067337773673, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921946, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 425 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1257.5, + "completions/mean_terminated_length": 1147.272705078125, + "completions/min_length": 841.0, + "completions/min_terminated_length": 841.0, + "epoch": 0.10652663165791448, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9383358252662382, + "kl": 0.0012273788452148438, + "learning_rate": 9.99892852670293e-07, + "loss": 0.0069, + "num_tokens": 19065731.0, + "reward": 0.0, + "reward_std": 0.33148959279060364, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.13359326395191445, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18185828183219813, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0926962382871743, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 426 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1307.0, + "completions/max_terminated_length": 1307.0, + "completions/mean_length": 973.875, + "completions/mean_terminated_length": 973.875, + "completions/min_length": 838.0, + "completions/min_terminated_length": 838.0, + "epoch": 0.10677669417354338, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.882179484241521, + "kl": 0.0021805763244628906, + "learning_rate": 9.998841098234867e-07, + "loss": 0.0058, + "num_tokens": 19098729.0, + "reward": 0.0, + "reward_std": 0.819907546043396, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07985359751963989, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12582458788884457, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1287403358472941, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 427 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1006.0, + "completions/max_terminated_length": 1006.0, + "completions/mean_length": 624.125, + "completions/mean_terminated_length": 624.125, + "completions/min_length": 458.0, + "completions/min_terminated_length": 458.0, + "epoch": 0.10702675668917229, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7893163755387285, + "kl": 0.002437591552734375, + "learning_rate": 9.998750241799439e-07, + "loss": -0.0582, + "num_tokens": 19136739.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9469300508499146, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03564841377225611, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05751652750076668, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07490735018081411, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 428 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1218.0, + "completions/max_terminated_length": 1218.0, + "completions/mean_length": 1021.0625, + "completions/mean_terminated_length": 1021.0625, + "completions/min_length": 958.0, + "completions/min_terminated_length": 958.0, + "epoch": 0.1072768192048012, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.088008636348857, + "kl": 0.002765655517578125, + "learning_rate": 9.99865595746588e-07, + "loss": -0.0028, + "num_tokens": 19185804.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9508347511291504, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07542284906966032, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09046034398753702, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11674600476945511, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 429 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1189.0, + "completions/mean_length": 1003.1875, + "completions/mean_terminated_length": 970.0667114257812, + "completions/min_length": 719.0, + "completions/min_terminated_length": 719.0, + "epoch": 0.10752688172043011, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.733659925239738, + "kl": 0.0022869110107421875, + "learning_rate": 9.998558245306028e-07, + "loss": 0.0069, + "num_tokens": 19218759.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.8390051126480103, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0030669333259065556, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.026250321320539838, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 430 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1240.0, + "completions/mean_length": 1318.375, + "completions/mean_terminated_length": 1136.75, + "completions/min_length": 904.0, + "completions/min_terminated_length": 904.0, + "epoch": 0.10777694423605902, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.462642584335649, + "kl": 0.0019931793212890625, + "learning_rate": 9.99845710539434e-07, + "loss": -0.0168, + "num_tokens": 19274333.0, + "reward": 0.0, + "reward_std": 0.45142415165901184, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09373337865330193, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2023754484722259, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133131, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 431 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1310.6875, + "completions/mean_terminated_length": 1121.375, + "completions/min_length": 876.0, + "completions/min_terminated_length": 876.0, + "epoch": 0.10802700675168792, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.901962645922736, + "kl": 0.002777099609375, + "learning_rate": 9.998352537807878e-07, + "loss": -0.0085, + "num_tokens": 19327312.0, + "reward": 0.0, + "reward_std": 1.0494855642318726, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07243884051242104, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10806614693466754, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194862, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 432 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1260.0, + "completions/mean_length": 1118.0625, + "completions/mean_terminated_length": 1092.60009765625, + "completions/min_length": 882.0, + "completions/min_terminated_length": 882.0, + "epoch": 0.10827706926731683, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3436900023558014, + "kl": 0.0014190673828125, + "learning_rate": 9.99824454262632e-07, + "loss": 0.0036, + "num_tokens": 19363441.0, + "reward": 0.0, + "reward_std": 0.7307407855987549, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01479998632782259, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0320783502305435, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06978803887752094, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 433 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1209.0, + "completions/max_terminated_length": 1209.0, + "completions/mean_length": 1043.875, + "completions/mean_terminated_length": 1043.875, + "completions/min_length": 826.0, + "completions/min_terminated_length": 826.0, + "epoch": 0.10852713178294573, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6729031474857097, + "kl": 0.0040435791015625, + "learning_rate": 9.998133119931953e-07, + "loss": 0.0042, + "num_tokens": 19406103.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0399914979934692, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.13429817065563548, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08830621961230006, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12816366850994057, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 434 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1304.0, + "completions/mean_length": 1267.375, + "completions/mean_terminated_length": 1161.6363525390625, + "completions/min_length": 1021.0, + "completions/min_terminated_length": 1021.0, + "epoch": 0.10877719429857464, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.707714044735896, + "kl": 0.002033233642578125, + "learning_rate": 9.998018269809681e-07, + "loss": 0.022, + "num_tokens": 19453333.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7753083109855652, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.012545183451018046, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05436510655692416, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06652763279965646, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 435 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1285.0, + "completions/max_terminated_length": 1285.0, + "completions/mean_length": 1118.5, + "completions/mean_terminated_length": 1118.5, + "completions/min_length": 824.0, + "completions/min_terminated_length": 824.0, + "epoch": 0.10902725681420355, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.608155064392364, + "kl": 0.003948211669921875, + "learning_rate": 9.997899992347014e-07, + "loss": 0.008, + "num_tokens": 19499477.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9484235048294067, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.021175051680327142, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07533380845870233, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 436 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1131.0, + "completions/max_terminated_length": 1131.0, + "completions/mean_length": 960.75, + "completions/mean_terminated_length": 960.75, + "completions/min_length": 856.0, + "completions/min_terminated_length": 856.0, + "epoch": 0.10927731932983246, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.3320430362817322, + "kl": 0.0004271268844604492, + "learning_rate": 9.997778287634075e-07, + "loss": -0.0112, + "num_tokens": 19534569.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5456872582435608, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06857088093680126, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1002632209563821, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078612, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 437 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1356.0, + "completions/max_terminated_length": 1356.0, + "completions/mean_length": 675.8125, + "completions/mean_terminated_length": 675.8125, + "completions/min_length": 443.0, + "completions/min_terminated_length": 443.0, + "epoch": 0.10952738184546136, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.432502217851674, + "kl": 0.00177764892578125, + "learning_rate": 9.9976531557636e-07, + "loss": -0.0848, + "num_tokens": 19562766.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.4597274959087372, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06626606198922527, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10849145648870212, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04127594582445935, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 438 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1166.875, + "completions/mean_terminated_length": 1144.666748046875, + "completions/min_length": 870.0, + "completions/min_terminated_length": 870.0, + "epoch": 0.10977744436109027, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5561820312234373, + "kl": 0.0020694732666015625, + "learning_rate": 9.997524596830937e-07, + "loss": 0.0076, + "num_tokens": 19606804.0, + "reward": 0.0, + "reward_std": 0.9311432838439941, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0461161784473087, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08078332448597508, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078612, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 439 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1243.0, + "completions/max_terminated_length": 1243.0, + "completions/mean_length": 888.9375, + "completions/mean_terminated_length": 888.9375, + "completions/min_length": 551.0, + "completions/min_terminated_length": 551.0, + "epoch": 0.11002750687671918, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4428606917330877, + "kl": 0.0025787353515625, + "learning_rate": 9.99739261093404e-07, + "loss": 0.0578, + "num_tokens": 19653971.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.6772654056549072, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.014718382765368524, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05430446050826683, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 440 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1411.0, + "completions/max_terminated_length": 1411.0, + "completions/mean_length": 1010.375, + "completions/mean_terminated_length": 1010.375, + "completions/min_length": 265.0, + "completions/min_terminated_length": 265.0, + "epoch": 0.11027756939234809, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.421262768233403, + "kl": 0.00252532958984375, + "learning_rate": 9.997257198173478e-07, + "loss": -0.1349, + "num_tokens": 19698233.0, + "reward": 0.0, + "reward_std": 0.5146583318710327, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.15665923577605484, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18434934950371085, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.103905227473387, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 441 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1251.125, + "completions/mean_terminated_length": 1215.571533203125, + "completions/min_length": 924.0, + "completions/min_terminated_length": 924.0, + "epoch": 0.110527631907977, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1856281829867146, + "kl": 0.003414154052734375, + "learning_rate": 9.997118358652435e-07, + "loss": 0.0223, + "num_tokens": 19751123.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6393890380859375, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08929015801608367, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05682212966081955, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14240006242195888, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 442 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1229.0, + "completions/max_terminated_length": 1229.0, + "completions/mean_length": 1014.875, + "completions/mean_terminated_length": 1014.875, + "completions/min_length": 767.0, + "completions/min_terminated_length": 767.0, + "epoch": 0.1107776944236059, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0415800832909365, + "kl": 0.002475738525390625, + "learning_rate": 9.996976092476697e-07, + "loss": -0.0135, + "num_tokens": 19791033.0, + "reward": 0.0, + "reward_std": 0.6015963554382324, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.031860889835528275, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07588870055207932, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11213417888437975, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 443 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 963.9375, + "completions/mean_terminated_length": 887.357177734375, + "completions/min_length": 551.0, + "completions/min_terminated_length": 551.0, + "epoch": 0.1110277569392348, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2846887152557755, + "kl": 0.002300262451171875, + "learning_rate": 9.996830399754672e-07, + "loss": -0.0803, + "num_tokens": 19832216.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6499773263931274, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03508424890726501, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08826519279855674, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12988598989256067, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 444 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1359.0, + "completions/max_terminated_length": 1359.0, + "completions/mean_length": 838.625, + "completions/mean_terminated_length": 838.625, + "completions/min_length": 489.0, + "completions/min_terminated_length": 489.0, + "epoch": 0.11127781945486372, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.358112378829443, + "kl": 0.003875732421875, + "learning_rate": 9.996681280597367e-07, + "loss": -0.0545, + "num_tokens": 19871114.0, + "reward": 0.0, + "reward_std": 0.9333539009094238, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.034569465471451344, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05898810520366185, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 445 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1360.0, + "completions/mean_length": 1140.25, + "completions/mean_terminated_length": 1116.2667236328125, + "completions/min_length": 800.0, + "completions/min_terminated_length": 800.0, + "epoch": 0.11152788197049263, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4052693683326045, + "kl": 0.0030059814453125, + "learning_rate": 9.996528735118408e-07, + "loss": -0.0271, + "num_tokens": 19905910.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6572015285491943, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06309898162000308, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 446 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1486.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1050.6875, + "completions/mean_terminated_length": 1050.6875, + "completions/min_length": 683.0, + "completions/min_terminated_length": 683.0, + "epoch": 0.11177794448612154, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5871047798473796, + "kl": 0.001804351806640625, + "learning_rate": 9.996372763434033e-07, + "loss": -0.051, + "num_tokens": 19946857.0, + "reward": 0.0, + "reward_std": 1.0336170196533203, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.014050000219165545, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12616233741013758, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.65, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466154, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 447 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1058.0, + "completions/mean_terminated_length": 1028.533447265625, + "completions/min_length": 831.0, + "completions/min_terminated_length": 831.0, + "epoch": 0.11202800700175043, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6403152715878204, + "kl": 0.0015974044799804688, + "learning_rate": 9.996213365663083e-07, + "loss": 0.0089, + "num_tokens": 19997601.0, + "reward": 0.0, + "reward_std": 1.0140193700790405, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05025316252943588, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0864332277190463, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901162, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 448 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1071.8125, + "completions/mean_terminated_length": 1043.2667236328125, + "completions/min_length": 617.0, + "completions/min_terminated_length": 617.0, + "epoch": 0.11227806951737934, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.192424559488774, + "kl": 0.00299072265625, + "learning_rate": 9.996050541927015e-07, + "loss": -0.0928, + "num_tokens": 20039278.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9971123933792114, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 449 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1463.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1215.5625, + "completions/mean_terminated_length": 1215.5625, + "completions/min_length": 752.0, + "completions/min_terminated_length": 752.0, + "epoch": 0.11252813203300825, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3534969665119667, + "kl": 0.001804351806640625, + "learning_rate": 9.995884292349896e-07, + "loss": -0.0482, + "num_tokens": 20094447.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0348457098007202, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.035441112478051325, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05222290356274493, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258099, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 450 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1211.875, + "completions/mean_terminated_length": 1192.666748046875, + "completions/min_length": 1002.0, + "completions/min_terminated_length": 1002.0, + "epoch": 0.11277819454863716, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1192801318331944, + "kl": 0.0031890869140625, + "learning_rate": 9.9957146170584e-07, + "loss": -0.0514, + "num_tokens": 20139717.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0499701499938965, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0218748547193819, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07411800447009037, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09098229375970786, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 451 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1388.6875, + "completions/mean_terminated_length": 1321.9000244140625, + "completions/min_length": 1234.0, + "completions/min_terminated_length": 1234.0, + "epoch": 0.11302825706426607, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8842447982375146, + "kl": 0.0017223358154296875, + "learning_rate": 9.995541516181817e-07, + "loss": 0.0038, + "num_tokens": 20200120.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9975371360778809, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02835449972266134, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11327596561598677, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869929, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 452 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1417.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1158.625, + "completions/mean_terminated_length": 1158.625, + "completions/min_length": 959.0, + "completions/min_terminated_length": 959.0, + "epoch": 0.11327831957989497, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8942846292452775, + "kl": 0.00240325927734375, + "learning_rate": 9.995364989852045e-07, + "loss": 0.0061, + "num_tokens": 20231682.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.6276419162750244, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.023763482921339327, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06469658361971548, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09339283817414602, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 453 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1469.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1138.4375, + "completions/mean_terminated_length": 1138.4375, + "completions/min_length": 951.0, + "completions/min_terminated_length": 951.0, + "epoch": 0.11352838209552388, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.42215737600918, + "kl": 0.0019969940185546875, + "learning_rate": 9.995185038203588e-07, + "loss": 0.0307, + "num_tokens": 20274425.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.677852988243103, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11064222034531476, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10075149286752551, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0642621944040945, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 454 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1270.0, + "completions/mean_length": 1296.8125, + "completions/mean_terminated_length": 1093.625, + "completions/min_length": 778.0, + "completions/min_terminated_length": 778.0, + "epoch": 0.11377844461115279, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.340218186114413, + "kl": 0.001773834228515625, + "learning_rate": 9.995001661373567e-07, + "loss": -0.0233, + "num_tokens": 20323086.0, + "reward": 0.0, + "reward_std": 0.921933650970459, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.046500576317833185, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11543840603923833, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6416666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12382783747337806, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 455 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1385.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 1237.6875, + "completions/mean_terminated_length": 1237.6875, + "completions/min_length": 1004.0, + "completions/min_terminated_length": 1004.0, + "epoch": 0.1140285071267817, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.7290986257196264, + "kl": 0.0009241104125976562, + "learning_rate": 9.994814859501702e-07, + "loss": -0.0152, + "num_tokens": 20361353.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0685139894485474, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12379016364228054, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.22315341141498754, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12041594578792297, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 456 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1297.75, + "completions/mean_terminated_length": 1176.4000244140625, + "completions/min_length": 615.0, + "completions/min_terminated_length": 615.0, + "epoch": 0.11427856964241061, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1838945586656653, + "kl": 0.0019779205322265625, + "learning_rate": 9.994624632730337e-07, + "loss": -0.0486, + "num_tokens": 20402709.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0498988628387451, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.008228145591395864, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.026428948951996257, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1031898645611484, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 457 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1419.0, + "completions/mean_length": 1241.75, + "completions/mean_terminated_length": 1155.666748046875, + "completions/min_length": 921.0, + "completions/min_terminated_length": 921.0, + "epoch": 0.1145286321580395, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9731348531495456, + "kl": 0.0012722015380859375, + "learning_rate": 9.994430981204415e-07, + "loss": 0.0195, + "num_tokens": 20461617.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.9915875196456909, + "rewards/wordcountpos_reward_GEOBench/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12239527613235844, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1135764773901167, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07503085784948504, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 458 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1235.0, + "completions/mean_terminated_length": 1076.0, + "completions/min_length": 947.0, + "completions/min_terminated_length": 947.0, + "epoch": 0.11477869467366841, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.256914476787831, + "kl": 0.0013742446899414062, + "learning_rate": 9.994233905071494e-07, + "loss": 0.002, + "num_tokens": 20513225.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8589866161346436, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.013776669800355704, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10473166431237643, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087683, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 459 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1349.0, + "completions/mean_length": 1075.5, + "completions/mean_terminated_length": 1047.2000732421875, + "completions/min_length": 681.0, + "completions/min_terminated_length": 681.0, + "epoch": 0.11502875718929732, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.522783622147947, + "kl": 0.0038299560546875, + "learning_rate": 9.994033404481736e-07, + "loss": -0.0179, + "num_tokens": 20566913.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0438793897628784, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08531770870459285, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0975366130628703, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07503085784948506, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 460 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1378.0, + "completions/max_terminated_length": 1378.0, + "completions/mean_length": 945.0, + "completions/mean_terminated_length": 945.0, + "completions/min_length": 637.0, + "completions/min_terminated_length": 637.0, + "epoch": 0.11527881970492623, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7953307125991937, + "kl": 0.0023134946823120117, + "learning_rate": 9.993829479587922e-07, + "loss": 0.0205, + "num_tokens": 20604233.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9744645953178406, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.024659252673450895, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12690740467423736, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1080980350662545, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 461 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1411.0, + "completions/max_terminated_length": 1411.0, + "completions/mean_length": 871.75, + "completions/mean_terminated_length": 871.75, + "completions/min_length": 524.0, + "completions/min_terminated_length": 524.0, + "epoch": 0.11552888222055514, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6327616525094073, + "kl": 0.002971649169921875, + "learning_rate": 9.993622130545427e-07, + "loss": -0.0085, + "num_tokens": 20633925.0, + "reward": 0.0, + "reward_std": 1.0286928415298462, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12816366850994057, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 462 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1399.0, + "completions/max_terminated_length": 1399.0, + "completions/mean_length": 1119.0, + "completions/mean_terminated_length": 1119.0, + "completions/min_length": 840.0, + "completions/min_terminated_length": 840.0, + "epoch": 0.11577894473618405, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.098764713869499, + "kl": 0.0023956298828125, + "learning_rate": 9.993411357512252e-07, + "loss": 0.023, + "num_tokens": 20679797.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9763658046722412, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0521574071434935, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06858397129544172, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.046943622609505783, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 463 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1492.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1199.875, + "completions/mean_terminated_length": 1199.875, + "completions/min_length": 1006.0, + "completions/min_terminated_length": 1006.0, + "epoch": 0.11602900725181295, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4156287648714017, + "kl": 0.004608154296875, + "learning_rate": 9.993197160648997e-07, + "loss": 0.0078, + "num_tokens": 20732067.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9783685207366943, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03805909103217653, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.052567012149136144, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.20143007241738775, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 464 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1322.0, + "completions/mean_length": 1225.6875, + "completions/mean_terminated_length": 1134.25, + "completions/min_length": 758.0, + "completions/min_terminated_length": 758.0, + "epoch": 0.11627906976744186, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1402200576985315, + "kl": 0.0015392303466796875, + "learning_rate": 9.992979540118872e-07, + "loss": 0.0322, + "num_tokens": 20782278.0, + "reward": 0.0, + "reward_std": 0.8326923847198486, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07597523580085179, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17027876724317487, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06426219440409447, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 465 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1102.0, + "completions/mean_length": 1206.5625, + "completions/mean_terminated_length": 913.125, + "completions/min_length": 693.0, + "completions/min_terminated_length": 693.0, + "epoch": 0.11652913228307077, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.959015715764203, + "kl": 0.002864837646484375, + "learning_rate": 9.992758496087696e-07, + "loss": -0.0487, + "num_tokens": 20836047.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6986961364746094, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03154831889825261, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08064359430377101, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08777074514725113, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 466 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1234.0, + "completions/max_terminated_length": 1234.0, + "completions/mean_length": 973.3125, + "completions/mean_terminated_length": 973.3125, + "completions/min_length": 729.0, + "completions/min_terminated_length": 729.0, + "epoch": 0.11677919479869968, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0464795250895516, + "kl": 0.0022182464599609375, + "learning_rate": 9.992534028723898e-07, + "loss": -0.0174, + "num_tokens": 20877420.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.992424726486206, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07067603541107653, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08799557120731556, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590966, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 467 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1271.0, + "completions/max_terminated_length": 1271.0, + "completions/mean_length": 935.75, + "completions/mean_terminated_length": 935.75, + "completions/min_length": 504.0, + "completions/min_terminated_length": 504.0, + "epoch": 0.11702925731432859, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7691912076955214, + "kl": 0.0028228759765625, + "learning_rate": 9.992306138198513e-07, + "loss": 0.0496, + "num_tokens": 20923184.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9973411560058594, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.13090789971284916, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.22862759952216002, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0582141639885766, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 468 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1381.1875, + "completions/mean_terminated_length": 1309.9000244140625, + "completions/min_length": 1247.0, + "completions/min_terminated_length": 1247.0, + "epoch": 0.11727931982995748, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.5466078804762555, + "kl": 0.0011043548583984375, + "learning_rate": 9.99207482468519e-07, + "loss": 0.0173, + "num_tokens": 20968603.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0604538917541504, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08546062564710599, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08408263048054085, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059472994182545084, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 469 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1285.0, + "completions/max_terminated_length": 1285.0, + "completions/mean_length": 903.5625, + "completions/mean_terminated_length": 903.5625, + "completions/min_length": 647.0, + "completions/min_terminated_length": 647.0, + "epoch": 0.1175293823455864, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6018482945375676, + "kl": 0.003017425537109375, + "learning_rate": 9.99184008836018e-07, + "loss": -0.0311, + "num_tokens": 20996740.0, + "reward": 0.0, + "reward_std": 0.9836671352386475, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.13207441649643256, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2758550087044649, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09651328828101763, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 470 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1218.0, + "completions/max_terminated_length": 1218.0, + "completions/mean_length": 868.0625, + "completions/mean_terminated_length": 868.0625, + "completions/min_length": 577.0, + "completions/min_terminated_length": 577.0, + "epoch": 0.1177794448612153, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8245745183863713, + "kl": 0.00321197509765625, + "learning_rate": 9.991601929402342e-07, + "loss": -0.0527, + "num_tokens": 21024037.0, + "reward": 0.0, + "reward_std": 1.0322167873382568, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02317361696418127, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04764396597728011, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09496588081262934, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 471 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1308.0, + "completions/mean_length": 1351.1875, + "completions/mean_terminated_length": 1235.4444580078125, + "completions/min_length": 1195.0, + "completions/min_terminated_length": 1195.0, + "epoch": 0.11802950737684421, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.6511199290339662, + "kl": 0.001110076904296875, + "learning_rate": 9.991360347993149e-07, + "loss": -0.0068, + "num_tokens": 21082600.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0523817539215088, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07175716904242216, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1317744296853896, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921946, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 472 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1308.25, + "completions/mean_terminated_length": 1193.2000732421875, + "completions/min_length": 653.0, + "completions/min_terminated_length": 653.0, + "epoch": 0.11827956989247312, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.287983533210016, + "kl": 0.003437042236328125, + "learning_rate": 9.991115344316677e-07, + "loss": -0.0073, + "num_tokens": 21139236.0, + "reward": 0.0, + "reward_std": 0.8997383713722229, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10362979506116665, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10137972136869466, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333331, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 473 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1285.0, + "completions/max_terminated_length": 1285.0, + "completions/mean_length": 951.75, + "completions/mean_terminated_length": 951.75, + "completions/min_length": 811.0, + "completions/min_terminated_length": 811.0, + "epoch": 0.11852963240810202, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8254207765028245, + "kl": 0.0020771026611328125, + "learning_rate": 9.990866918559607e-07, + "loss": -0.0497, + "num_tokens": 21173160.0, + "reward": 0.0, + "reward_std": 0.8687636852264404, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07434640960796121, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12768080367052, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639733, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 474 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1285.0625, + "completions/mean_terminated_length": 1270.7333984375, + "completions/min_length": 1108.0, + "completions/min_terminated_length": 1108.0, + "epoch": 0.11877969492373093, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.320845469611608, + "kl": 0.0017871856689453125, + "learning_rate": 9.990615070911237e-07, + "loss": -0.0151, + "num_tokens": 21226849.0, + "reward": 0.0, + "reward_std": 0.9385799169540405, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.039590954442305766, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07394145252434572, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746357, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 475 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1318.25, + "completions/mean_terminated_length": 1257.666748046875, + "completions/min_length": 952.0, + "completions/min_terminated_length": 952.0, + "epoch": 0.11902975743935984, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1645881705353145, + "kl": 0.003589630126953125, + "learning_rate": 9.990359801563461e-07, + "loss": 0.0209, + "num_tokens": 21277285.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9868928790092468, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01756951643080076, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05511175721082276, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06206328908341755, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 476 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1368.0, + "completions/mean_length": 1228.75, + "completions/mean_terminated_length": 1105.45458984375, + "completions/min_length": 836.0, + "completions/min_terminated_length": 836.0, + "epoch": 0.11927981995498875, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.273800818119665, + "kl": 0.00366973876953125, + "learning_rate": 9.990101110710792e-07, + "loss": -0.0025, + "num_tokens": 21324601.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0340567827224731, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.023006925094900196, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07948267411527786, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408153, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 477 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1399.0, + "completions/max_terminated_length": 1399.0, + "completions/mean_length": 1085.375, + "completions/mean_terminated_length": 1085.375, + "completions/min_length": 437.0, + "completions/min_terminated_length": 437.0, + "epoch": 0.11952988247061766, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.159904868004268, + "kl": 0.00307464599609375, + "learning_rate": 9.989838998550336e-07, + "loss": -0.0371, + "num_tokens": 21381607.0, + "reward": 0.0, + "reward_std": 0.84410560131073, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.011729034755839371, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.023917414730801848, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11547005383792516, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 478 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1356.0, + "completions/max_terminated_length": 1356.0, + "completions/mean_length": 1023.125, + "completions/mean_terminated_length": 1023.125, + "completions/min_length": 883.0, + "completions/min_terminated_length": 883.0, + "epoch": 0.11977994498624656, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.6870476286960314, + "kl": 0.0016269683837890625, + "learning_rate": 9.989573465281822e-07, + "loss": -0.0254, + "num_tokens": 21430721.0, + "reward": 0.0, + "reward_std": 1.000291109085083, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03510427667199128, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07292794343591792, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10470416879457554, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 479 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1315.875, + "completions/mean_terminated_length": 1254.5, + "completions/min_length": 1018.0, + "completions/min_terminated_length": 1018.0, + "epoch": 0.12003000750187547, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.242448878863798, + "kl": 0.003253936767578125, + "learning_rate": 9.98930451110757e-07, + "loss": -0.0254, + "num_tokens": 21475375.0, + "reward": 0.0, + "reward_std": 0.9548476338386536, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0052787857579421876, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0310737911549311, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0894427190999916, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 480 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1126.9375, + "completions/mean_terminated_length": 1102.0667724609375, + "completions/min_length": 611.0, + "completions/min_terminated_length": 611.0, + "epoch": 0.12028007001750438, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.340903357945458, + "kl": 0.003391265869140625, + "learning_rate": 9.989032136232521e-07, + "loss": 0.0068, + "num_tokens": 21517902.0, + "reward": 0.0, + "reward_std": 1.0344252586364746, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04493716044625756, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0652600439078487, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11287488977066931, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 481 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1206.0, + "completions/max_terminated_length": 1206.0, + "completions/mean_length": 1037.4375, + "completions/mean_terminated_length": 1037.4375, + "completions/min_length": 717.0, + "completions/min_terminated_length": 717.0, + "epoch": 0.12053013253313329, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.584090150613325, + "kl": 0.0022182464599609375, + "learning_rate": 9.988756340864209e-07, + "loss": -0.0448, + "num_tokens": 21560501.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.49676966667175293, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00797031256979978, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.019830842789940344, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13045504405165223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 482 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1312.0, + "completions/mean_length": 1126.25, + "completions/mean_terminated_length": 835.5555419921875, + "completions/min_length": 766.0, + "completions/min_terminated_length": 766.0, + "epoch": 0.1207801950487622, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.4305009468464422, + "kl": 0.0009260177612304688, + "learning_rate": 9.988477125212786e-07, + "loss": 0.0019, + "num_tokens": 21604121.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9534294605255127, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06000416032722413, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07663491070844125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921946, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 483 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1376.0, + "completions/mean_length": 1064.4375, + "completions/mean_terminated_length": 1002.21435546875, + "completions/min_length": 730.0, + "completions/min_terminated_length": 730.0, + "epoch": 0.12103025756439109, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8883662385395725, + "kl": 0.0027618408203125, + "learning_rate": 9.988194489491003e-07, + "loss": -0.0124, + "num_tokens": 21642600.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9321235418319702, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.036588304676980966, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09692973022454926, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09651328828101766, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 484 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1473.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1315.3125, + "completions/mean_terminated_length": 1315.3125, + "completions/min_length": 777.0, + "completions/min_terminated_length": 777.0, + "epoch": 0.12128032008002, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.041196430760287, + "kl": 0.002773284912109375, + "learning_rate": 9.98790843391422e-07, + "loss": 0.0094, + "num_tokens": 21689669.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9781796932220459, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10509780313992463, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1842309112254362, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12988598989256067, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 485 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1417.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1017.3125, + "completions/mean_terminated_length": 1017.3125, + "completions/min_length": 821.0, + "completions/min_terminated_length": 821.0, + "epoch": 0.12153038259564891, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.62830202748193, + "kl": 0.002079010009765625, + "learning_rate": 9.9876189587004e-07, + "loss": 0.0177, + "num_tokens": 21726658.0, + "reward": 0.0, + "reward_std": 0.9242294430732727, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03719860332525413, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11705753489577597, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.055611083361076466, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 486 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1199.8125, + "completions/mean_terminated_length": 1063.3636474609375, + "completions/min_length": 816.0, + "completions/min_terminated_length": 816.0, + "epoch": 0.12178044511127782, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6831111374895373, + "kl": 0.0023040771484375, + "learning_rate": 9.987326064070114e-07, + "loss": 0.0024, + "num_tokens": 21775183.0, + "reward": 0.0, + "reward_std": 0.6643351316452026, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04179788742030815, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.043910698736612, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08062257748298553, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 487 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1410.0, + "completions/mean_length": 1121.0625, + "completions/mean_terminated_length": 1066.9285888671875, + "completions/min_length": 543.0, + "completions/min_terminated_length": 543.0, + "epoch": 0.12203050762690673, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8753954991396626, + "kl": 0.00237274169921875, + "learning_rate": 9.987029750246541e-07, + "loss": -0.0691, + "num_tokens": 21830416.0, + "reward": 0.0, + "reward_std": 1.0024687051773071, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0009582334059005659, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09895978940420416, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 488 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1348.0, + "completions/mean_length": 1133.25, + "completions/mean_terminated_length": 1011.0, + "completions/min_length": 794.0, + "completions/min_terminated_length": 794.0, + "epoch": 0.12228057014253563, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0167278788087812, + "kl": 0.003093719482421875, + "learning_rate": 9.986730017455458e-07, + "loss": -0.0093, + "num_tokens": 21881252.0, + "reward": 0.0, + "reward_std": 0.9853301048278809, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.010290320834108732, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.046679142005282184, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12852438880818892, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 489 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1225.0, + "completions/max_terminated_length": 1225.0, + "completions/mean_length": 1001.125, + "completions/mean_terminated_length": 1001.125, + "completions/min_length": 590.0, + "completions/min_terminated_length": 590.0, + "epoch": 0.12253063265816454, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.088715362605181, + "kl": 0.0022401809692382812, + "learning_rate": 9.986426865925253e-07, + "loss": -0.045, + "num_tokens": 21911254.0, + "reward": -1.6763806343078613e-08, + "reward_std": 1.0661542415618896, + "rewards/wordcountpos_reward_GEOBench/mean": -1.6763806343078613e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010519770549957857, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.048725110206193925, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06309898162000308, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 490 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1332.0, + "completions/mean_length": 1172.8125, + "completions/mean_terminated_length": 1151.0, + "completions/min_length": 1009.0, + "completions/min_terminated_length": 1009.0, + "epoch": 0.12278069517379345, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.7241694912069947, + "kl": 0.0003293156623840332, + "learning_rate": 9.986120295886917e-07, + "loss": -0.0236, + "num_tokens": 21947755.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.053109049797058, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04817911761437584, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04428007753822325, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06440611887195308, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 491 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1177.0, + "completions/mean_length": 1074.6875, + "completions/mean_terminated_length": 1046.3333740234375, + "completions/min_length": 925.0, + "completions/min_terminated_length": 925.0, + "epoch": 0.12303075768942236, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0493137373676555, + "kl": 0.0012140274047851562, + "learning_rate": 9.985810307574049e-07, + "loss": -0.004, + "num_tokens": 21988734.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0513246059417725, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08150869427792898, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06156310448369525, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07503085784948506, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 492 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1460.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1229.9375, + "completions/mean_terminated_length": 1229.9375, + "completions/min_length": 1014.0, + "completions/min_terminated_length": 1014.0, + "epoch": 0.12328082020505127, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4963236684971726, + "kl": 0.0020904541015625, + "learning_rate": 9.985496901222846e-07, + "loss": -0.0213, + "num_tokens": 22038557.0, + "reward": 0.0, + "reward_std": 0.8496633172035217, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.059821570128304205, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.047675615411133083, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09737289911202955, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 493 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1338.0, + "completions/mean_length": 1185.625, + "completions/mean_terminated_length": 1164.666748046875, + "completions/min_length": 977.0, + "completions/min_terminated_length": 977.0, + "epoch": 0.12353088272068018, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5354953310050905, + "kl": 0.002559661865234375, + "learning_rate": 9.985180077072116e-07, + "loss": -0.0122, + "num_tokens": 22087807.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8560189604759216, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06475012131003337, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15928608822134419, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 494 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1343.0, + "completions/mean_length": 1153.0, + "completions/mean_terminated_length": 1129.86669921875, + "completions/min_length": 764.0, + "completions/min_terminated_length": 764.0, + "epoch": 0.12378094523630907, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.434220587186112, + "kl": 0.003879547119140625, + "learning_rate": 9.984859835363266e-07, + "loss": 0.0031, + "num_tokens": 22135871.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9174438118934631, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06368674895204908, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05204661615710918, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 495 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1035.375, + "completions/mean_terminated_length": 1004.4000244140625, + "completions/min_length": 513.0, + "completions/min_terminated_length": 513.0, + "epoch": 0.12403100775193798, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9844338618432773, + "kl": 0.002262115478515625, + "learning_rate": 9.984536176340313e-07, + "loss": -0.0625, + "num_tokens": 22176837.0, + "reward": 0.0, + "reward_std": 0.8271920680999756, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0029941811247888427, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0937112567467378, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.067631901304592, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 496 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1307.0, + "completions/max_terminated_length": 1307.0, + "completions/mean_length": 948.75, + "completions/mean_terminated_length": 948.75, + "completions/min_length": 644.0, + "completions/min_terminated_length": 644.0, + "epoch": 0.12428107026756689, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.1874592548828022, + "kl": 0.00036537647247314453, + "learning_rate": 9.98420910024987e-07, + "loss": -0.0638, + "num_tokens": 22213857.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9812844395637512, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01936975440575858, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07884457022292338, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 497 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1139.0, + "completions/max_terminated_length": 1139.0, + "completions/mean_length": 1024.625, + "completions/mean_terminated_length": 1024.625, + "completions/min_length": 776.0, + "completions/min_terminated_length": 776.0, + "epoch": 0.1245311327831958, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.673312681538535, + "kl": 0.0021686553955078125, + "learning_rate": 9.98387860734116e-07, + "loss": 0.0002, + "num_tokens": 22248755.0, + "reward": 0.0, + "reward_std": 1.017157793045044, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06500015913534689, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05504515590670135, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05692750425533113, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 498 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1317.0, + "completions/max_terminated_length": 1317.0, + "completions/mean_length": 990.6875, + "completions/mean_terminated_length": 990.6875, + "completions/min_length": 704.0, + "completions/min_terminated_length": 704.0, + "epoch": 0.12478119529882471, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6827380430596595, + "kl": 0.003887176513671875, + "learning_rate": 9.98354469786601e-07, + "loss": 0.0069, + "num_tokens": 22287078.0, + "reward": 0.0, + "reward_std": 0.7481107711791992, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.009009967998796074, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08031527751818927, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408158, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 499 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1038.0, + "completions/max_terminated_length": 1038.0, + "completions/mean_length": 875.625, + "completions/mean_terminated_length": 875.625, + "completions/min_length": 684.0, + "completions/min_terminated_length": 684.0, + "epoch": 0.12503125781445362, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6238360585797826, + "kl": 0.00183868408203125, + "learning_rate": 9.983207372078845e-07, + "loss": -0.0051, + "num_tokens": 22328712.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.875469982624054, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.016216950265234224, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03120896024539966, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07187952884282611, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 500 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1278.0, + "completions/mean_length": 1038.25, + "completions/mean_terminated_length": 1007.4667358398438, + "completions/min_length": 710.0, + "completions/min_terminated_length": 710.0, + "epoch": 0.12528132033008252, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.446619593400502, + "kl": 0.00396728515625, + "learning_rate": 9.982866630236697e-07, + "loss": 0.0098, + "num_tokens": 22363036.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8586524128913879, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05128892976787998, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08787624571297283, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 501 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1011.8125, + "completions/mean_terminated_length": 942.0714721679688, + "completions/min_length": 657.0, + "completions/min_terminated_length": 657.0, + "epoch": 0.12553138284571141, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9969698621428376, + "kl": 0.0039577484130859375, + "learning_rate": 9.9825224725992e-07, + "loss": 0.0176, + "num_tokens": 22414761.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.050168752670288, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.055633099309451345, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12509359252613836, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0859586463881842, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 502 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1185.9375, + "completions/mean_terminated_length": 941.6666870117188, + "completions/min_length": 772.0, + "completions/min_terminated_length": 772.0, + "epoch": 0.12578144536134034, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.936346276135685, + "kl": 0.00298309326171875, + "learning_rate": 9.98217489942859e-07, + "loss": -0.0353, + "num_tokens": 22465528.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8285526633262634, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.059780884220592016, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05934168166117334, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.067631901304592, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 503 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1421.0, + "completions/max_terminated_length": 1421.0, + "completions/mean_length": 1129.3125, + "completions/mean_terminated_length": 1129.3125, + "completions/min_length": 880.0, + "completions/min_terminated_length": 880.0, + "epoch": 0.12603150787696923, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.490297673958819, + "kl": 0.004146575927734375, + "learning_rate": 9.981823910989707e-07, + "loss": 0.0276, + "num_tokens": 22511045.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0241107940673828, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0004771630881000941, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07373421257474412, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042258, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 504 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1075.0, + "completions/max_terminated_length": 1075.0, + "completions/mean_length": 815.3125, + "completions/mean_terminated_length": 815.3125, + "completions/min_length": 618.0, + "completions/min_terminated_length": 618.0, + "epoch": 0.12628157039259816, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.987503385480096, + "kl": 0.0020084381103515625, + "learning_rate": 9.981469507549993e-07, + "loss": 0.012, + "num_tokens": 22541874.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9670090675354004, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.058355513487687805, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08749553213725886, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941137, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 505 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1360.0, + "completions/mean_length": 1315.6875, + "completions/mean_terminated_length": 1172.3333740234375, + "completions/min_length": 804.0, + "completions/min_terminated_length": 804.0, + "epoch": 0.12653163290822705, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0525068975643253, + "kl": 0.001758575439453125, + "learning_rate": 9.98111168937949e-07, + "loss": -0.0064, + "num_tokens": 22589189.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.4391608238220215, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10262849488465975, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08466837688062771, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12382783747337807, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 506 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1364.0, + "completions/mean_length": 961.625, + "completions/mean_terminated_length": 925.7333984375, + "completions/min_length": 664.0, + "completions/min_terminated_length": 664.0, + "epoch": 0.12678169542385595, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8191247439809675, + "kl": 0.00437164306640625, + "learning_rate": 9.980750456750844e-07, + "loss": -0.0353, + "num_tokens": 22639223.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9226388931274414, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.015802040451202617, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03594183797994165, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06309898162000308, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 507 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1256.0, + "completions/mean_length": 1247.6875, + "completions/mean_terminated_length": 1051.4444580078125, + "completions/min_length": 807.0, + "completions/min_terminated_length": 807.0, + "epoch": 0.12703175793948487, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.169140233792912, + "kl": 0.0013804435729980469, + "learning_rate": 9.980385809939303e-07, + "loss": 0.0172, + "num_tokens": 22678690.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0190508365631104, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03765713971949531, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.045767675164769504, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08421753138505425, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 508 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1496.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1065.4375, + "completions/mean_terminated_length": 1065.4375, + "completions/min_length": 816.0, + "completions/min_terminated_length": 816.0, + "epoch": 0.12728182045511377, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9577820908401864, + "kl": 0.003505706787109375, + "learning_rate": 9.980017749222716e-07, + "loss": -0.0311, + "num_tokens": 22721145.0, + "reward": 0.0, + "reward_std": 0.729495644569397, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.006549463528345031, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.26740213041989186, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590962, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 509 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1129.8125, + "completions/mean_terminated_length": 1105.1334228515625, + "completions/min_length": 715.0, + "completions/min_terminated_length": 715.0, + "epoch": 0.1275318829707427, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.934818872828042, + "kl": 0.002841949462890625, + "learning_rate": 9.979646274881533e-07, + "loss": 0.0037, + "num_tokens": 22763286.0, + "reward": 4.0978193283081055e-08, + "reward_std": 1.049876093864441, + "rewards/wordcountpos_reward_GEOBench/mean": 4.0978193283081055e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.002883654723764544, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02400884991093857, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11417984514369006, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 510 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1405.0, + "completions/mean_length": 951.8125, + "completions/mean_terminated_length": 873.5000610351562, + "completions/min_length": 509.0, + "completions/min_terminated_length": 509.0, + "epoch": 0.1277819454863716, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9969347536544477, + "kl": 0.004425048828125, + "learning_rate": 9.979271387198804e-07, + "loss": -0.1299, + "num_tokens": 22803139.0, + "reward": 0.0, + "reward_std": 0.7554850578308105, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.016258869321923754, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.026016024486811017, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298359, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 511 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1452.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1154.9375, + "completions/mean_terminated_length": 1154.9375, + "completions/min_length": 763.0, + "completions/min_terminated_length": 763.0, + "epoch": 0.1280320080020005, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7213171416255957, + "kl": 0.004657745361328125, + "learning_rate": 9.978893086460182e-07, + "loss": -0.0424, + "num_tokens": 22842122.0, + "reward": 0.0, + "reward_std": 1.019124150276184, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0382330215838651, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0663981384597086, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10532137766186217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 512 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1323.0, + "completions/max_terminated_length": 1323.0, + "completions/mean_length": 967.375, + "completions/mean_terminated_length": 967.375, + "completions/min_length": 723.0, + "completions/min_terminated_length": 723.0, + "epoch": 0.1282820705176294, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8491000323662288, + "kl": 0.0045166015625, + "learning_rate": 9.97851137295392e-07, + "loss": 0.0024, + "num_tokens": 22892592.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0557444095611572, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.15006084794012126, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12454623335787925, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11979921473804345, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 513 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1296.5, + "completions/mean_terminated_length": 1228.666748046875, + "completions/min_length": 1015.0, + "completions/min_terminated_length": 1015.0, + "epoch": 0.1285321330332583, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8379617286300256, + "kl": 0.002964019775390625, + "learning_rate": 9.978126246970868e-07, + "loss": -0.024, + "num_tokens": 22946584.0, + "reward": 0.0, + "reward_std": 0.5564714670181274, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12037964854067126, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06254379225717949, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13381856152046848, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 514 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1305.0, + "completions/mean_length": 1048.9375, + "completions/mean_terminated_length": 1018.86669921875, + "completions/min_length": 553.0, + "completions/min_terminated_length": 553.0, + "epoch": 0.12878219554888723, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5735691501443134, + "kl": 0.004791259765625, + "learning_rate": 9.977737708804483e-07, + "loss": -0.0257, + "num_tokens": 23004503.0, + "reward": 0.0, + "reward_std": 0.6283920407295227, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0757364512420175, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.23252828103168138, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1029203215725281, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 515 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1324.0, + "completions/mean_length": 1216.0625, + "completions/mean_terminated_length": 1087.0, + "completions/min_length": 855.0, + "completions/min_terminated_length": 855.0, + "epoch": 0.12903225806451613, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1475012662406026, + "kl": 0.003551483154296875, + "learning_rate": 9.977345758750815e-07, + "loss": -0.007, + "num_tokens": 23056792.0, + "reward": 0.0, + "reward_std": 0.7265195846557617, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03817712759820832, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.061130912235576755, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07685966046898339, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 516 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1391.3125, + "completions/mean_terminated_length": 1282.625, + "completions/min_length": 1003.0, + "completions/min_terminated_length": 1003.0, + "epoch": 0.12928232058014505, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.043521695374755, + "kl": 0.00350189208984375, + "learning_rate": 9.97695039710852e-07, + "loss": -0.0207, + "num_tokens": 23113229.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0532939434051514, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.028252684360755398, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14628059173608188, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 517 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1181.0, + "completions/max_terminated_length": 1181.0, + "completions/mean_length": 927.5, + "completions/mean_terminated_length": 927.5, + "completions/min_length": 721.0, + "completions/min_terminated_length": 721.0, + "epoch": 0.12953238309577395, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.208715562167414, + "kl": 0.0044097900390625, + "learning_rate": 9.976551624178847e-07, + "loss": -0.0239, + "num_tokens": 23159757.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.016371488571167, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06651845734688337, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08559105471049146, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07084150279686705, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 518 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1410.0, + "completions/max_terminated_length": 1410.0, + "completions/mean_length": 929.375, + "completions/mean_terminated_length": 929.375, + "completions/min_length": 479.0, + "completions/min_terminated_length": 479.0, + "epoch": 0.12978244561140284, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.220806527774962, + "kl": 0.0042266845703125, + "learning_rate": 9.97614944026565e-07, + "loss": 0.0785, + "num_tokens": 23195579.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6541420817375183, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.37066069215113334, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3862686600075902, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823629, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 519 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1233.0, + "completions/max_terminated_length": 1233.0, + "completions/mean_length": 1125.75, + "completions/mean_terminated_length": 1125.75, + "completions/min_length": 938.0, + "completions/min_terminated_length": 938.0, + "epoch": 0.13003250812703177, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.3376504162329679, + "kl": 0.0006660223007202148, + "learning_rate": 9.975743845675381e-07, + "loss": -0.0029, + "num_tokens": 23228927.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0244802236557007, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01834543860842012, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04244811969500902, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06978803887752093, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 520 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1084.0, + "completions/max_terminated_length": 1084.0, + "completions/mean_length": 952.8125, + "completions/mean_terminated_length": 952.8125, + "completions/min_length": 674.0, + "completions/min_terminated_length": 674.0, + "epoch": 0.13028257064266066, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8824811847404597, + "kl": 0.0043792724609375, + "learning_rate": 9.975334840717086e-07, + "loss": -0.0363, + "num_tokens": 23269580.0, + "reward": 0.0, + "reward_std": 0.595811128616333, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03344054515407356, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.059099287311281924, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10602235962635781, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 521 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1321.375, + "completions/mean_terminated_length": 1182.4444580078125, + "completions/min_length": 983.0, + "completions/min_terminated_length": 983.0, + "epoch": 0.13053263315828958, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.723176090660248, + "kl": 0.00321197509765625, + "learning_rate": 9.974922425702416e-07, + "loss": -0.064, + "num_tokens": 23325122.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7928065061569214, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11998653913539886, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14480622019928574, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1746424919657298, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 522 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 1204.8125, + "completions/mean_terminated_length": 1070.6363525390625, + "completions/min_length": 847.0, + "completions/min_terminated_length": 847.0, + "epoch": 0.13078269567391848, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2890412791112564, + "kl": 0.003620147705078125, + "learning_rate": 9.974506600945616e-07, + "loss": 0.0227, + "num_tokens": 23374735.0, + "reward": 0.0, + "reward_std": 0.5852730870246887, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04502768359252428, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08034446087037876, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1437590576856522, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 523 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1363.0, + "completions/mean_length": 1214.3125, + "completions/mean_terminated_length": 1195.2667236328125, + "completions/min_length": 848.0, + "completions/min_terminated_length": 848.0, + "epoch": 0.13103275818954738, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9938212146233085, + "kl": 0.0029621124267578125, + "learning_rate": 9.974087366763534e-07, + "loss": -0.003, + "num_tokens": 23416420.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.20380209386348724, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.031058452161353042, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.22719217277699225, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13305526559931294, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 524 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1107.25, + "completions/mean_terminated_length": 871.6000366210938, + "completions/min_length": 265.0, + "completions/min_terminated_length": 265.0, + "epoch": 0.1312828207051763, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.339340925372444, + "kl": 0.0019998550415039062, + "learning_rate": 9.973664723475606e-07, + "loss": -0.0618, + "num_tokens": 23458872.0, + "reward": 0.0, + "reward_std": 0.9507225751876831, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0023467484282928422, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05617452919007055, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941137, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 525 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1282.4375, + "completions/mean_terminated_length": 1183.5455322265625, + "completions/min_length": 868.0, + "completions/min_terminated_length": 868.0, + "epoch": 0.1315328832208052, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3320544204680624, + "kl": 0.004306793212890625, + "learning_rate": 9.973238671403877e-07, + "loss": 0.0014, + "num_tokens": 23514063.0, + "reward": 0.0, + "reward_std": 0.7242544889450073, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06505329586370125, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08862827804514926, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316813, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 526 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1107.375, + "completions/mean_terminated_length": 1081.2000732421875, + "completions/min_length": 762.0, + "completions/min_terminated_length": 762.0, + "epoch": 0.13178294573643412, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.33945749481696, + "kl": 0.003833770751953125, + "learning_rate": 9.972809210872985e-07, + "loss": 0.0757, + "num_tokens": 23550613.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0034337043762207, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0351623597900655, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0880223462022602, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639732, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 527 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1121.875, + "completions/mean_terminated_length": 1096.666748046875, + "completions/min_length": 739.0, + "completions/min_terminated_length": 739.0, + "epoch": 0.13203300825206302, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.500662828643573, + "kl": 0.003406524658203125, + "learning_rate": 9.972376342210162e-07, + "loss": 0.0068, + "num_tokens": 23592331.0, + "reward": 0.0, + "reward_std": 0.8951210975646973, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04977295311129134, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.057706204765003356, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10390522747338703, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 528 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1212.0, + "completions/mean_length": 1285.75, + "completions/mean_terminated_length": 1071.5, + "completions/min_length": 989.0, + "completions/min_terminated_length": 989.0, + "epoch": 0.1322830707676919, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5647061465837604, + "kl": 0.0025768280029296875, + "learning_rate": 9.97194006574524e-07, + "loss": 0.0114, + "num_tokens": 23640343.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0383625030517578, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10299508465533097, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10372111895133994, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15563490039905004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 529 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1450.4375, + "completions/mean_terminated_length": 1386.71435546875, + "completions/min_length": 1231.0, + "completions/min_terminated_length": 1231.0, + "epoch": 0.13253313328332084, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2271179332205, + "kl": 0.0017185211181640625, + "learning_rate": 9.97150038181065e-07, + "loss": -0.0111, + "num_tokens": 23696438.0, + "reward": -3.725290298461914e-08, + "reward_std": 0.9646656513214111, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07148722696924767, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04716470392431703, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0850925422157591, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 530 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1131.0, + "completions/max_terminated_length": 1131.0, + "completions/mean_length": 865.1875, + "completions/mean_terminated_length": 865.1875, + "completions/min_length": 610.0, + "completions/min_terminated_length": 610.0, + "epoch": 0.13278319579894973, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1102467115811465, + "kl": 0.002620697021484375, + "learning_rate": 9.971057290741413e-07, + "loss": -0.021, + "num_tokens": 23739081.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7353893518447876, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.24456874698358683, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1302748715559959, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563382, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 531 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1069.0, + "completions/max_terminated_length": 1069.0, + "completions/mean_length": 934.0625, + "completions/mean_terminated_length": 934.0625, + "completions/min_length": 802.0, + "completions/min_terminated_length": 802.0, + "epoch": 0.13303325831457866, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.17455593418590598, + "kl": 7.003545761108398e-05, + "learning_rate": 9.97061079287515e-07, + "loss": -0.004, + "num_tokens": 23773322.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.397393137216568, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.007453036050616934, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1241193113700527, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.03442651863295481, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 532 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1388.0, + "completions/max_terminated_length": 1388.0, + "completions/mean_length": 1127.9375, + "completions/mean_terminated_length": 1127.9375, + "completions/min_length": 923.0, + "completions/min_terminated_length": 923.0, + "epoch": 0.13328332083020755, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3676925536596256, + "kl": 0.004070281982421875, + "learning_rate": 9.97016088855208e-07, + "loss": 0.0269, + "num_tokens": 23809433.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9639080166816711, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10865665454697258, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05682495738494908, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 533 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1354.0, + "completions/mean_length": 1213.25, + "completions/mean_terminated_length": 990.2222290039062, + "completions/min_length": 628.0, + "completions/min_terminated_length": 628.0, + "epoch": 0.13353338334583645, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.307428175416801, + "kl": 0.001972198486328125, + "learning_rate": 9.969707578115012e-07, + "loss": -0.0647, + "num_tokens": 23858317.0, + "reward": 0.0, + "reward_std": 0.7815545797348022, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.21764423691912288, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21310850602911072, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10532137766186217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 534 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1294.1875, + "completions/mean_terminated_length": 1170.7000732421875, + "completions/min_length": 920.0, + "completions/min_terminated_length": 920.0, + "epoch": 0.13378344586146537, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3399346928503992, + "kl": 0.004878997802734375, + "learning_rate": 9.969250861909357e-07, + "loss": 0.0017, + "num_tokens": 23906368.0, + "reward": 0.0, + "reward_std": 0.7112383246421814, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019296500722649667, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16842763604043762, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11147163731607217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 535 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1427.0, + "completions/max_terminated_length": 1427.0, + "completions/mean_length": 1009.875, + "completions/mean_terminated_length": 1009.875, + "completions/min_length": 485.0, + "completions/min_terminated_length": 485.0, + "epoch": 0.13403350837709427, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9000368289978224, + "kl": 0.00485992431640625, + "learning_rate": 9.968790740283117e-07, + "loss": 0.0315, + "num_tokens": 23959422.0, + "reward": 0.0, + "reward_std": 0.8126864433288574, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0795498922566296, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08039801962101259, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10470416879457554, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 536 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1294.0, + "completions/mean_length": 1071.0, + "completions/mean_terminated_length": 1009.71435546875, + "completions/min_length": 816.0, + "completions/min_terminated_length": 816.0, + "epoch": 0.1342835708927232, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.819114010780478, + "kl": 0.0047149658203125, + "learning_rate": 9.968327213586883e-07, + "loss": 0.025, + "num_tokens": 24012518.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8958336114883423, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.023314663322859634, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09156881457072692, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078611, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 537 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1182.9375, + "completions/mean_terminated_length": 1161.800048828125, + "completions/min_length": 836.0, + "completions/min_terminated_length": 836.0, + "epoch": 0.1345336334083521, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4353423294960286, + "kl": 0.0024242401123046875, + "learning_rate": 9.967860282173856e-07, + "loss": -0.0236, + "num_tokens": 24048685.0, + "reward": 0.0, + "reward_std": 0.9726213216781616, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.050089389231970725, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11732758263588476, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 538 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1181.0, + "completions/max_terminated_length": 1181.0, + "completions/mean_length": 994.3125, + "completions/mean_terminated_length": 994.3125, + "completions/min_length": 693.0, + "completions/min_terminated_length": 693.0, + "epoch": 0.13478369592398098, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.700523298709491, + "kl": 0.00476837158203125, + "learning_rate": 9.967389946399818e-07, + "loss": 0.0273, + "num_tokens": 24083050.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.691209077835083, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.028380378070615542, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11230386441410885, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06540472290116196, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 539 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1395.0, + "completions/max_terminated_length": 1395.0, + "completions/mean_length": 955.0625, + "completions/mean_terminated_length": 955.0625, + "completions/min_length": 723.0, + "completions/min_terminated_length": 723.0, + "epoch": 0.1350337584396099, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.415628462337578, + "kl": 0.0016527175903320312, + "learning_rate": 9.966916206623148e-07, + "loss": 0.0132, + "num_tokens": 24127667.0, + "reward": 0.0, + "reward_std": 0.8168976306915283, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10014885864805739, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.055416375908085645, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09418264367902598, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 540 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1430.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1149.0, + "completions/mean_terminated_length": 1149.0, + "completions/min_length": 868.0, + "completions/min_terminated_length": 868.0, + "epoch": 0.1352838209552388, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7692585339746967, + "kl": 0.003269195556640625, + "learning_rate": 9.966439063204823e-07, + "loss": 0.0126, + "num_tokens": 24165891.0, + "reward": 0.0, + "reward_std": 0.7012863159179688, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1620799072389941, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10434195738378764, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1529342632927262, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 541 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1467.0, + "completions/mean_length": 1318.4375, + "completions/mean_terminated_length": 1136.875, + "completions/min_length": 828.0, + "completions/min_terminated_length": 828.0, + "epoch": 0.13553388347086773, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3292524200165974, + "kl": 0.0045623779296875, + "learning_rate": 9.965958516508408e-07, + "loss": -0.0183, + "num_tokens": 24220218.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9518120884895325, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019235466078862302, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06160915797707672, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5833333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11287488977066927, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 542 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1423.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1108.1875, + "completions/mean_terminated_length": 1108.1875, + "completions/min_length": 867.0, + "completions/min_terminated_length": 867.0, + "epoch": 0.13578394598649662, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2871643231558916, + "kl": 0.00414276123046875, + "learning_rate": 9.965474566900065e-07, + "loss": -0.0522, + "num_tokens": 24266549.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6842765808105469, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.021172818013000458, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10198819760983971, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 543 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1364.8125, + "completions/mean_terminated_length": 1319.75, + "completions/min_length": 990.0, + "completions/min_terminated_length": 990.0, + "epoch": 0.13603400850212552, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.789511286726518, + "kl": 0.0035247802734375, + "learning_rate": 9.964987214748547e-07, + "loss": -0.0146, + "num_tokens": 24319554.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0351877212524414, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09680033169312058, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10361497404751799, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06070572613176774, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 544 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1418.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 1072.4375, + "completions/mean_terminated_length": 1072.4375, + "completions/min_length": 842.0, + "completions/min_terminated_length": 842.0, + "epoch": 0.13628407101775444, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.38976753395478, + "kl": 0.004405975341796875, + "learning_rate": 9.9644964604252e-07, + "loss": -0.0468, + "num_tokens": 24371897.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9353458881378174, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09131942230263602, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12526230885208448, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 545 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1452.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1205.3125, + "completions/mean_terminated_length": 1205.3125, + "completions/min_length": 845.0, + "completions/min_terminated_length": 845.0, + "epoch": 0.13653413353338334, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6518842380268195, + "kl": 0.002796173095703125, + "learning_rate": 9.964002304303962e-07, + "loss": -0.0191, + "num_tokens": 24413566.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5421290993690491, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.15006462637437845, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.32268579891831095, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066471, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 546 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1300.1875, + "completions/mean_terminated_length": 1233.5833740234375, + "completions/min_length": 1087.0, + "completions/min_terminated_length": 1087.0, + "epoch": 0.13678419604901226, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0681037826413697, + "kl": 0.004119873046875, + "learning_rate": 9.963504746761365e-07, + "loss": -0.014, + "num_tokens": 24468681.0, + "reward": 0.0, + "reward_std": 0.4832967221736908, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05887365636360213, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0693438827910109, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 547 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1248.0, + "completions/max_terminated_length": 1248.0, + "completions/mean_length": 894.25, + "completions/mean_terminated_length": 894.25, + "completions/min_length": 718.0, + "completions/min_terminated_length": 718.0, + "epoch": 0.13703425856464116, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.6940492753008909, + "kl": 0.0011034011840820312, + "learning_rate": 9.963003788176528e-07, + "loss": -0.0061, + "num_tokens": 24497597.0, + "reward": 0.0, + "reward_std": 0.7937939167022705, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0453132221386746, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.038657709752783376, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05426273532033237, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 548 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1381.0, + "completions/mean_length": 1420.6875, + "completions/mean_terminated_length": 1288.5, + "completions/min_length": 1077.0, + "completions/min_terminated_length": 1077.0, + "epoch": 0.13728432108027006, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6849965516277483, + "kl": 0.00347137451171875, + "learning_rate": 9.962499428931168e-07, + "loss": -0.0043, + "num_tokens": 24553128.0, + "reward": 0.0, + "reward_std": 0.8921339511871338, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07809153334451181, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.254361150662776, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133132, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 549 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1035.5, + "completions/mean_terminated_length": 1004.5333862304688, + "completions/min_length": 768.0, + "completions/min_terminated_length": 768.0, + "epoch": 0.13753438359589898, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8277770448928927, + "kl": 0.00469970703125, + "learning_rate": 9.961991669409592e-07, + "loss": -0.0171, + "num_tokens": 24595016.0, + "reward": 0.0, + "reward_std": 0.7219088077545166, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.057327110095205844, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06795022697952832, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15244914148902494, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 550 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1242.125, + "completions/mean_terminated_length": 1041.5555419921875, + "completions/min_length": 822.0, + "completions/min_terminated_length": 822.0, + "epoch": 0.13778444611152788, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.112686840722075, + "kl": 0.003932952880859375, + "learning_rate": 9.961480509998691e-07, + "loss": -0.0033, + "num_tokens": 24643050.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0352966785430908, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06131913393434931, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.138197577878526, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.102469507659596, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 551 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1197.6875, + "completions/mean_terminated_length": 1096.916748046875, + "completions/min_length": 873.0, + "completions/min_terminated_length": 873.0, + "epoch": 0.1380345086271568, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.02691785151223, + "kl": 0.004024505615234375, + "learning_rate": 9.960965951087954e-07, + "loss": -0.0503, + "num_tokens": 24684781.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6511217355728149, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05051363931545115, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16999850827486235, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1377060745318193, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 552 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1414.0, + "completions/mean_length": 1312.375, + "completions/mean_terminated_length": 1249.8333740234375, + "completions/min_length": 969.0, + "completions/min_terminated_length": 969.0, + "epoch": 0.1382845711427857, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2766293438347596, + "kl": 0.00440216064453125, + "learning_rate": 9.960447993069457e-07, + "loss": 0.0315, + "num_tokens": 24737347.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0143721103668213, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.021328223369904094, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10142209898687021, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087684, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 553 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1323.0, + "completions/max_terminated_length": 1323.0, + "completions/mean_length": 1000.1875, + "completions/mean_terminated_length": 1000.1875, + "completions/min_length": 565.0, + "completions/min_terminated_length": 565.0, + "epoch": 0.1385346336584146, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.166134113773567, + "kl": 0.0028371810913085938, + "learning_rate": 9.959926636337867e-07, + "loss": -0.0691, + "num_tokens": 24783630.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6034929752349854, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04139483635413098, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07372706814115049, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6458333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1958362883937802, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 554 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1425.0, + "completions/mean_length": 1079.875, + "completions/mean_terminated_length": 1051.86669921875, + "completions/min_length": 643.0, + "completions/min_terminated_length": 643.0, + "epoch": 0.13878469617404351, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4637932511512295, + "kl": 0.004791259765625, + "learning_rate": 9.959401881290441e-07, + "loss": 0.018, + "num_tokens": 24825780.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6338469982147217, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.091510283875925, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07778078511987636, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08509254221575908, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 555 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1414.0, + "completions/mean_length": 1090.8125, + "completions/mean_terminated_length": 1032.357177734375, + "completions/min_length": 700.0, + "completions/min_terminated_length": 700.0, + "epoch": 0.1390347586896724, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2390083261154374, + "kl": 0.00408172607421875, + "learning_rate": 9.958873728327026e-07, + "loss": 0.0602, + "num_tokens": 24865945.0, + "reward": 0.0, + "reward_std": 0.950857400894165, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010147718052311714, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03629486993294452, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05821416398857663, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 556 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1317.0, + "completions/max_terminated_length": 1317.0, + "completions/mean_length": 1179.8125, + "completions/mean_terminated_length": 1179.8125, + "completions/min_length": 1022.0, + "completions/min_terminated_length": 1022.0, + "epoch": 0.13928482120530133, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.232695255973004, + "kl": 0.002468109130859375, + "learning_rate": 9.958342177850053e-07, + "loss": -0.0044, + "num_tokens": 24906958.0, + "reward": 0.0, + "reward_std": 0.7562099695205688, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04666825561654362, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07044630473052961, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 557 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1133.0, + "completions/max_terminated_length": 1133.0, + "completions/mean_length": 817.3125, + "completions/mean_terminated_length": 817.3125, + "completions/min_length": 657.0, + "completions/min_terminated_length": 657.0, + "epoch": 0.13953488372093023, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3780822199628484, + "kl": 0.0030574798583984375, + "learning_rate": 9.95780723026455e-07, + "loss": -0.0003, + "num_tokens": 24935107.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0564441680908203, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.050732197243485475, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13376181483720487, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.131021626713557, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 558 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1197.375, + "completions/mean_terminated_length": 1127.5384521484375, + "completions/min_length": 745.0, + "completions/min_terminated_length": 745.0, + "epoch": 0.13978494623655913, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6946693041356835, + "kl": 0.00519561767578125, + "learning_rate": 9.957268885978127e-07, + "loss": 0.0091, + "num_tokens": 24977217.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0016885995864868, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05427737298585947, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09974795694798955, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1954576775256058, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 559 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1195.0, + "completions/mean_length": 1253.9375, + "completions/mean_terminated_length": 1007.875, + "completions/min_length": 866.0, + "completions/min_terminated_length": 866.0, + "epoch": 0.14003500875218805, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6804992458078893, + "kl": 0.0026187896728515625, + "learning_rate": 9.956727145400987e-07, + "loss": 0.0315, + "num_tokens": 25030152.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.033705234527588, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10233559485186661, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11593315247241309, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242309, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 560 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1396.0, + "completions/mean_length": 1199.9375, + "completions/mean_terminated_length": 1179.933349609375, + "completions/min_length": 979.0, + "completions/min_terminated_length": 979.0, + "epoch": 0.14028507126781695, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.409732493528164, + "kl": 0.004199981689453125, + "learning_rate": 9.956182008945913e-07, + "loss": -0.0277, + "num_tokens": 25082311.0, + "reward": 0.0, + "reward_std": 0.8880563974380493, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01819666146324469, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12021756013211483, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298364, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 561 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1205.0, + "completions/mean_length": 1165.25, + "completions/mean_terminated_length": 904.888916015625, + "completions/min_length": 823.0, + "completions/min_terminated_length": 823.0, + "epoch": 0.14053513378344587, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3134721556694164, + "kl": 0.0018768310546875, + "learning_rate": 9.955633477028285e-07, + "loss": 0.0361, + "num_tokens": 25132347.0, + "reward": 0.0, + "reward_std": 0.764082670211792, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06541902227810302, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11478107525645823, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 562 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1227.0, + "completions/max_terminated_length": 1227.0, + "completions/mean_length": 922.75, + "completions/mean_terminated_length": 922.75, + "completions/min_length": 647.0, + "completions/min_terminated_length": 647.0, + "epoch": 0.14078519629907477, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.309089583556207, + "kl": 0.00397491455078125, + "learning_rate": 9.955081550066067e-07, + "loss": -0.0597, + "num_tokens": 25180399.0, + "reward": 0.0, + "reward_std": 0.9794434309005737, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04550869720855327, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06990294111483113, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11666666666666668, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 563 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1286.3125, + "completions/mean_terminated_length": 1072.625, + "completions/min_length": 493.0, + "completions/min_terminated_length": 493.0, + "epoch": 0.14103525881470366, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.150677184554094, + "kl": 0.005523681640625, + "learning_rate": 9.954526228479807e-07, + "loss": -0.0603, + "num_tokens": 25245116.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.03964364528656, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.007493585802643346, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08123553546011938, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 564 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1317.0, + "completions/max_terminated_length": 1317.0, + "completions/mean_length": 1162.4375, + "completions/mean_terminated_length": 1162.4375, + "completions/min_length": 1085.0, + "completions/min_terminated_length": 1085.0, + "epoch": 0.14128532133033259, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9365313362037946, + "kl": 0.0031833648681640625, + "learning_rate": 9.953967512692642e-07, + "loss": -0.0202, + "num_tokens": 25295939.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8833714723587036, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0458350521777665, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13075876054710983, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042258, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 565 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1325.8125, + "completions/mean_terminated_length": 1314.2000732421875, + "completions/min_length": 908.0, + "completions/min_terminated_length": 908.0, + "epoch": 0.14153538384596148, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8836384064805487, + "kl": 0.003894805908203125, + "learning_rate": 9.953405403130292e-07, + "loss": 0.0144, + "num_tokens": 25345584.0, + "reward": 0.0, + "reward_std": 1.0566082000732422, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.058775324435192663, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18471972305782505, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941139, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 566 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1123.0, + "completions/max_terminated_length": 1123.0, + "completions/mean_length": 923.3125, + "completions/mean_terminated_length": 923.3125, + "completions/min_length": 769.0, + "completions/min_terminated_length": 769.0, + "epoch": 0.1417854463615904, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5524635665232993, + "kl": 0.004314422607421875, + "learning_rate": 9.952839900221071e-07, + "loss": -0.0105, + "num_tokens": 25398349.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0068414211273193, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.033238919070296975, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08078418942368708, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408155, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 567 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1300.0, + "completions/max_terminated_length": 1300.0, + "completions/mean_length": 1048.625, + "completions/mean_terminated_length": 1048.625, + "completions/min_length": 923.0, + "completions/min_terminated_length": 923.0, + "epoch": 0.1420355088772193, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9510585658030846, + "kl": 0.003040313720703125, + "learning_rate": 9.95227100439587e-07, + "loss": 0.0228, + "num_tokens": 25435783.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9196453094482422, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.032018969558051655, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07409422000270265, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639736, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 568 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1162.0, + "completions/mean_length": 1195.0625, + "completions/mean_terminated_length": 890.125, + "completions/min_length": 656.0, + "completions/min_terminated_length": 656.0, + "epoch": 0.14228557139284823, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5359976756912497, + "kl": 0.002902984619140625, + "learning_rate": 9.95169871608817e-07, + "loss": -0.0412, + "num_tokens": 25486224.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.399206280708313, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.026093002729264368, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11703476029988895, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0665276327996565, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 569 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1388.0, + "completions/max_terminated_length": 1388.0, + "completions/mean_length": 896.3125, + "completions/mean_terminated_length": 896.3125, + "completions/min_length": 433.0, + "completions/min_terminated_length": 433.0, + "epoch": 0.14253563390847712, + "frac_reward_zero_std": 0.0, + "grad_norm": 0.6361116281510751, + "kl": 0.00023704767227172852, + "learning_rate": 9.951123035734037e-07, + "loss": 0.0323, + "num_tokens": 25522277.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9116305112838745, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02984983329329311, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06625946801219092, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 570 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1427.0, + "completions/mean_length": 1039.375, + "completions/mean_terminated_length": 1008.666748046875, + "completions/min_length": 619.0, + "completions/min_terminated_length": 619.0, + "epoch": 0.14278569642410602, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2467636084671248, + "kl": 0.00412750244140625, + "learning_rate": 9.950543963772117e-07, + "loss": -0.0606, + "num_tokens": 25563483.0, + "reward": 0.0, + "reward_std": 0.7267336845397949, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09885122470742455, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14097360869296927, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11674600476945511, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 571 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1348.0, + "completions/max_terminated_length": 1348.0, + "completions/mean_length": 1024.5, + "completions/mean_terminated_length": 1024.5, + "completions/min_length": 662.0, + "completions/min_terminated_length": 662.0, + "epoch": 0.14303575893973494, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2914154612774666, + "kl": 0.0021047592163085938, + "learning_rate": 9.949961500643646e-07, + "loss": 0.0037, + "num_tokens": 25597907.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9912971258163452, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04395243649345228, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10412586355533679, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05900408021045227, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 572 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1134.0, + "completions/max_terminated_length": 1134.0, + "completions/mean_length": 938.8125, + "completions/mean_terminated_length": 938.8125, + "completions/min_length": 785.0, + "completions/min_terminated_length": 785.0, + "epoch": 0.14328582145536384, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7862250675424836, + "kl": 0.0020465850830078125, + "learning_rate": 9.94937564679244e-07, + "loss": 0.0278, + "num_tokens": 25628856.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0305507183074951, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.007394273335734016, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03519764206258481, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06978803887752094, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 573 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1467.0, + "completions/mean_length": 1388.375, + "completions/mean_terminated_length": 1276.75, + "completions/min_length": 895.0, + "completions/min_terminated_length": 895.0, + "epoch": 0.14353588397099276, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6532925075869627, + "kl": 0.00384521484375, + "learning_rate": 9.948786402664901e-07, + "loss": 0.0094, + "num_tokens": 25685118.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8979947566986084, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04192963614681245, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07954608703164971, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639736, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 574 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1382.0, + "completions/mean_length": 1199.25, + "completions/mean_terminated_length": 1062.5455322265625, + "completions/min_length": 928.0, + "completions/min_terminated_length": 928.0, + "epoch": 0.14378594648662166, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.655657127992163, + "kl": 0.003047943115234375, + "learning_rate": 9.948193768710014e-07, + "loss": -0.0361, + "num_tokens": 25744154.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8650979995727539, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06180243564601922, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12526979820065356, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901162, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 575 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1211.0, + "completions/mean_terminated_length": 1114.666748046875, + "completions/min_length": 696.0, + "completions/min_terminated_length": 696.0, + "epoch": 0.14403600900225055, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.42445447034662, + "kl": 0.005401611328125, + "learning_rate": 9.947597745379345e-07, + "loss": -0.0388, + "num_tokens": 25786370.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6571464538574219, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10748562878513981, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11400581596626223, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06652763279965648, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 576 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1179.0, + "completions/max_terminated_length": 1179.0, + "completions/mean_length": 916.1875, + "completions/mean_terminated_length": 916.1875, + "completions/min_length": 661.0, + "completions/min_terminated_length": 661.0, + "epoch": 0.14428607151787948, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0825517503100817, + "kl": 0.003143310546875, + "learning_rate": 9.946998333127048e-07, + "loss": 0.013, + "num_tokens": 25814341.0, + "reward": 0.0, + "reward_std": 1.0281645059585571, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.039081850129278936, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06400686147925141, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13977495139343474, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 577 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 967.25, + "completions/mean_terminated_length": 931.7333984375, + "completions/min_length": 758.0, + "completions/min_terminated_length": 758.0, + "epoch": 0.14453613403350837, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4353967150460343, + "kl": 0.002746105194091797, + "learning_rate": 9.946395532409847e-07, + "loss": -0.1216, + "num_tokens": 25853809.0, + "reward": 0.0, + "reward_std": 0.8325865268707275, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0036175414235028055, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03717658094012606, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09727776191382574, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 578 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1158.0, + "completions/mean_length": 980.3125, + "completions/mean_terminated_length": 945.6666870117188, + "completions/min_length": 552.0, + "completions/min_terminated_length": 552.0, + "epoch": 0.1447861965491373, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0231956377024822, + "kl": 0.0013604164123535156, + "learning_rate": 9.945789343687062e-07, + "loss": -0.0539, + "num_tokens": 25888886.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5303905010223389, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11137245585157231, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10491314620583367, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17469550228474265, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 579 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1466.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1032.875, + "completions/mean_terminated_length": 1032.875, + "completions/min_length": 336.0, + "completions/min_terminated_length": 336.0, + "epoch": 0.1450362590647662, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.731437015996924, + "kl": 0.0028047561645507812, + "learning_rate": 9.94517976742059e-07, + "loss": -0.1899, + "num_tokens": 25939732.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0577865839004517, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0019551052688948317, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09560779110575873, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11122216672215285, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 580 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1243.0, + "completions/max_terminated_length": 1243.0, + "completions/mean_length": 959.375, + "completions/mean_terminated_length": 959.375, + "completions/min_length": 786.0, + "completions/min_terminated_length": 786.0, + "epoch": 0.1452863215803951, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7558961789763687, + "kl": 0.0044708251953125, + "learning_rate": 9.944566804074903e-07, + "loss": 0.0069, + "num_tokens": 25979818.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8938461542129517, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.014206113351571641, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13742290095511187, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11538983843829065, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 581 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1105.0, + "completions/mean_length": 766.6875, + "completions/mean_terminated_length": 717.800048828125, + "completions/min_length": 433.0, + "completions/min_terminated_length": 433.0, + "epoch": 0.145536384096024, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.891588272137379, + "kl": 0.0024852752685546875, + "learning_rate": 9.943950454117062e-07, + "loss": 0.0471, + "num_tokens": 26014637.0, + "reward": 0.0, + "reward_std": 0.814212441444397, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.056129555680764324, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04704983112575642, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11538983843829066, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 582 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1339.0, + "completions/max_terminated_length": 1339.0, + "completions/mean_length": 1136.6875, + "completions/mean_terminated_length": 1136.6875, + "completions/min_length": 963.0, + "completions/min_terminated_length": 963.0, + "epoch": 0.1457864466116529, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9998408850773086, + "kl": 0.0032501220703125, + "learning_rate": 9.943330718016707e-07, + "loss": 0.0191, + "num_tokens": 26058464.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0101852416992188, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06412615881552444, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10535771045228744, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 583 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1262.0625, + "completions/mean_terminated_length": 1228.071533203125, + "completions/min_length": 892.0, + "completions/min_terminated_length": 892.0, + "epoch": 0.14603650912728183, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.328323714773342, + "kl": 0.003047943115234375, + "learning_rate": 9.942707596246051e-07, + "loss": 0.0333, + "num_tokens": 26116825.0, + "reward": 0.0, + "reward_std": 0.5319293737411499, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01161810999727462, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04306671396402434, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10945995377982527, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 584 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1238.0, + "completions/mean_length": 1225.5625, + "completions/mean_terminated_length": 951.125, + "completions/min_length": 784.0, + "completions/min_terminated_length": 784.0, + "epoch": 0.14628657164291073, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.619677478771794, + "kl": 0.0030193328857421875, + "learning_rate": 9.942081089279895e-07, + "loss": -0.0266, + "num_tokens": 26165034.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.87298983335495, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.046471669835930984, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20636203573812778, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6124999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09496588081262931, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 585 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1284.0, + "completions/max_terminated_length": 1284.0, + "completions/mean_length": 977.6875, + "completions/mean_terminated_length": 977.6875, + "completions/min_length": 728.0, + "completions/min_terminated_length": 728.0, + "epoch": 0.14653663415853962, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.220787107532062, + "kl": 0.00439453125, + "learning_rate": 9.941451197595616e-07, + "loss": -0.0302, + "num_tokens": 26208333.0, + "reward": 0.0, + "reward_std": 0.6020612716674805, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.18531909297563173, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.7327541154392073, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08073734277593313, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 586 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1380.0, + "completions/mean_length": 1128.125, + "completions/mean_terminated_length": 1103.3333740234375, + "completions/min_length": 600.0, + "completions/min_terminated_length": 600.0, + "epoch": 0.14678669667416855, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2457708817696154, + "kl": 0.004261016845703125, + "learning_rate": 9.940817921673171e-07, + "loss": 0.0304, + "num_tokens": 26253895.0, + "reward": 0.0, + "reward_std": 0.8725961446762085, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.013086077060335197, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.022231356887665546, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04127594582445936, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 587 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1355.0, + "completions/mean_length": 980.25, + "completions/mean_terminated_length": 576.0, + "completions/min_length": 253.0, + "completions/min_terminated_length": 253.0, + "epoch": 0.14703675918979744, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.498507769018676, + "kl": 0.00318145751953125, + "learning_rate": 9.940181261995095e-07, + "loss": 0.0, + "num_tokens": 26311211.0, + "reward": 0.0, + "reward_std": 0.7454190254211426, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.31330273231764627, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.23243673674901752, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059004080210452246, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 588 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1205.4375, + "completions/mean_terminated_length": 1163.357177734375, + "completions/min_length": 900.0, + "completions/min_terminated_length": 900.0, + "epoch": 0.14728682170542637, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9665250971037223, + "kl": 0.003551483154296875, + "learning_rate": 9.939541219046503e-07, + "loss": -0.0466, + "num_tokens": 26352458.0, + "reward": 0.0, + "reward_std": 0.7613252401351929, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07553269971278051, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07176045510715867, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1548595540529595, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 589 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1404.0, + "completions/mean_length": 1223.0, + "completions/mean_terminated_length": 1056.800048828125, + "completions/min_length": 754.0, + "completions/min_terminated_length": 754.0, + "epoch": 0.14753688422105526, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2411533607766883, + "kl": 0.0046844482421875, + "learning_rate": 9.938897793315084e-07, + "loss": 0.0397, + "num_tokens": 26394778.0, + "reward": 0.0, + "reward_std": 0.8644813895225525, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09843442321268221, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14027299039900942, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 590 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1378.0, + "completions/max_terminated_length": 1378.0, + "completions/mean_length": 1164.125, + "completions/mean_terminated_length": 1164.125, + "completions/min_length": 927.0, + "completions/min_terminated_length": 927.0, + "epoch": 0.14778694673668416, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4692263744457907, + "kl": 0.00504302978515625, + "learning_rate": 9.938250985291109e-07, + "loss": -0.0381, + "num_tokens": 26444572.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0225211381912231, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.20645432721223908, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.274591584743421, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10327955589886448, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 591 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1265.0, + "completions/mean_length": 929.8125, + "completions/mean_terminated_length": 891.800048828125, + "completions/min_length": 684.0, + "completions/min_terminated_length": 684.0, + "epoch": 0.14803700925231308, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7972668512002397, + "kl": 0.0054168701171875, + "learning_rate": 9.93760079546742e-07, + "loss": -0.0087, + "num_tokens": 26484681.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9700056314468384, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.040576157017564764, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08501815210854557, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238704, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 592 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1132.875, + "completions/mean_terminated_length": 1080.4285888671875, + "completions/min_length": 878.0, + "completions/min_terminated_length": 878.0, + "epoch": 0.14828707176794198, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4097449145653767, + "kl": 0.002651214599609375, + "learning_rate": 9.936947224339445e-07, + "loss": 0.0156, + "num_tokens": 26531951.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.060255765914917, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04640258890277966, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05798845392173077, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1261979632400061, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 593 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1206.125, + "completions/mean_terminated_length": 1186.533447265625, + "completions/min_length": 840.0, + "completions/min_terminated_length": 840.0, + "epoch": 0.1485371342835709, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.063777562056498, + "kl": 0.004608154296875, + "learning_rate": 9.93629027240518e-07, + "loss": -0.0685, + "num_tokens": 26579633.0, + "reward": 0.0, + "reward_std": 0.9374594688415527, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.037962430449265255, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06427881260361987, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11213417888437976, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 594 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1392.0, + "completions/mean_length": 1117.0, + "completions/mean_terminated_length": 1028.615478515625, + "completions/min_length": 648.0, + "completions/min_terminated_length": 648.0, + "epoch": 0.1487871967991998, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4950427847069614, + "kl": 0.0024566650390625, + "learning_rate": 9.935629940165207e-07, + "loss": -0.014, + "num_tokens": 26623417.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0273593664169312, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.012510003914418796, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08237177102519577, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07391185942027817, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 595 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1065.0625, + "completions/mean_terminated_length": 867.3636474609375, + "completions/min_length": 596.0, + "completions/min_terminated_length": 596.0, + "epoch": 0.1490372593148287, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5274595574860577, + "kl": 0.0032291412353515625, + "learning_rate": 9.934966228122668e-07, + "loss": 0.0943, + "num_tokens": 26677954.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5483658313751221, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.002146699977528561, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.008586799910114244, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13763881881375054, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 596 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1098.0, + "completions/mean_length": 1277.125, + "completions/mean_terminated_length": 1054.25, + "completions/min_length": 1041.0, + "completions/min_terminated_length": 1041.0, + "epoch": 0.14928732183045762, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.738098991200129, + "kl": 0.0017099380493164062, + "learning_rate": 9.934299136783295e-07, + "loss": -0.0059, + "num_tokens": 26730820.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.4762175679206848, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.015300273023360952, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06494253489770169, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620107, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 597 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1438.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1014.8125, + "completions/mean_terminated_length": 1014.8125, + "completions/min_length": 620.0, + "completions/min_terminated_length": 620.0, + "epoch": 0.14953738434608652, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.718365083094395, + "kl": 0.00577545166015625, + "learning_rate": 9.933628666655388e-07, + "loss": 0.0417, + "num_tokens": 26779593.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.928152322769165, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11743672713122211, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07033722209511302, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12531441937663718, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 598 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1079.375, + "completions/mean_terminated_length": 1051.3333740234375, + "completions/min_length": 801.0, + "completions/min_terminated_length": 801.0, + "epoch": 0.14978744686171544, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9740495293641653, + "kl": 0.00469970703125, + "learning_rate": 9.932954818249825e-07, + "loss": 0.0267, + "num_tokens": 26829119.0, + "reward": 0.0, + "reward_std": 0.7347744703292847, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02744298852916007, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04224571654423721, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12931443160847217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 599 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1347.125, + "completions/mean_terminated_length": 1194.25, + "completions/min_length": 921.0, + "completions/min_terminated_length": 921.0, + "epoch": 0.15003750937734434, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.742643059596914, + "kl": 0.003620147705078125, + "learning_rate": 9.932277592080053e-07, + "loss": 0.0195, + "num_tokens": 26880545.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9660696983337402, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11871302593919755, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10739628559183718, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04849589520621153, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 600 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1239.0, + "completions/max_terminated_length": 1239.0, + "completions/mean_length": 1072.5, + "completions/mean_terminated_length": 1072.5, + "completions/min_length": 800.0, + "completions/min_terminated_length": 800.0, + "epoch": 0.15028757189297323, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.641619348720334, + "kl": 0.0055694580078125, + "learning_rate": 9.931596988662098e-07, + "loss": -0.0091, + "num_tokens": 26925609.0, + "reward": 0.0, + "reward_std": 0.7729371786117554, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.030149228068584166, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09040469704860038, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316813, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 601 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1343.0625, + "completions/mean_terminated_length": 1186.125, + "completions/min_length": 1015.0, + "completions/min_terminated_length": 1015.0, + "epoch": 0.15053763440860216, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.353906501683638, + "kl": 0.003086090087890625, + "learning_rate": 9.930913008514556e-07, + "loss": -0.0354, + "num_tokens": 26980098.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9765421152114868, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10208526792278656, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.101667802618831, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 602 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1186.0, + "completions/max_terminated_length": 1186.0, + "completions/mean_length": 820.1875, + "completions/mean_terminated_length": 820.1875, + "completions/min_length": 595.0, + "completions/min_terminated_length": 595.0, + "epoch": 0.15078769692423105, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.488467656375712, + "kl": 0.004421234130859375, + "learning_rate": 9.930225652158596e-07, + "loss": 0.0659, + "num_tokens": 27011957.0, + "reward": 0.0, + "reward_std": 0.6544331908226013, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.14358720158239366, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06657377165822227, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09496588081262934, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 603 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1136.0, + "completions/max_terminated_length": 1136.0, + "completions/mean_length": 933.6875, + "completions/mean_terminated_length": 933.6875, + "completions/min_length": 694.0, + "completions/min_terminated_length": 694.0, + "epoch": 0.15103775943985998, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7477346114525685, + "kl": 0.00545501708984375, + "learning_rate": 9.929534920117965e-07, + "loss": -0.043, + "num_tokens": 27048160.0, + "reward": 0.0, + "reward_std": 1.0017578601837158, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0797325115585829, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08706645098422826, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1365582225578092, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 604 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1268.0, + "completions/max_terminated_length": 1268.0, + "completions/mean_length": 1126.0625, + "completions/mean_terminated_length": 1126.0625, + "completions/min_length": 755.0, + "completions/min_terminated_length": 755.0, + "epoch": 0.15128782195548887, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.5797413864694916, + "kl": 0.0010509490966796875, + "learning_rate": 9.928840812918975e-07, + "loss": 0.0233, + "num_tokens": 27085201.0, + "reward": -3.725290298461914e-08, + "reward_std": 1.0192571878433228, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.021453923070155985, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0675639266938992, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 605 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1303.0, + "completions/max_terminated_length": 1303.0, + "completions/mean_length": 965.875, + "completions/mean_terminated_length": 965.875, + "completions/min_length": 556.0, + "completions/min_terminated_length": 556.0, + "epoch": 0.15153788447111777, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6772232791535533, + "kl": 0.0028228759765625, + "learning_rate": 9.92814333109051e-07, + "loss": -0.0141, + "num_tokens": 27124511.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.706709623336792, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.013879156210530222, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03549157950248385, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 606 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1304.0, + "completions/max_terminated_length": 1304.0, + "completions/mean_length": 998.5, + "completions/mean_terminated_length": 998.5, + "completions/min_length": 690.0, + "completions/min_terminated_length": 690.0, + "epoch": 0.1517879469867467, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5333483933333127, + "kl": 0.003772735595703125, + "learning_rate": 9.927442475164033e-07, + "loss": 0.0039, + "num_tokens": 27162951.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.657606303691864, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.31362971174929644, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.30576420844603475, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6124999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09496588081262931, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 607 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1175.0, + "completions/max_terminated_length": 1175.0, + "completions/mean_length": 979.0625, + "completions/mean_terminated_length": 979.0625, + "completions/min_length": 784.0, + "completions/min_terminated_length": 784.0, + "epoch": 0.1520380095023756, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.576478994123704, + "kl": 0.002780914306640625, + "learning_rate": 9.926738245673572e-07, + "loss": -0.0498, + "num_tokens": 27211816.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9621330499649048, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.022713177961768107, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08607318333401062, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194865, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 608 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1254.0, + "completions/mean_length": 968.875, + "completions/mean_terminated_length": 893.0000610351562, + "completions/min_length": 677.0, + "completions/min_terminated_length": 677.0, + "epoch": 0.1522880720180045, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.888757711093492, + "kl": 0.003566741943359375, + "learning_rate": 9.926030643155722e-07, + "loss": 0.0224, + "num_tokens": 27246102.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8595496416091919, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.002334967590519186, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14802527907756138, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258096, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 609 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1036.4375, + "completions/mean_terminated_length": 970.21435546875, + "completions/min_length": 650.0, + "completions/min_terminated_length": 650.0, + "epoch": 0.1525381345336334, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.842808357985824, + "kl": 0.002964019775390625, + "learning_rate": 9.925319668149657e-07, + "loss": -0.0357, + "num_tokens": 27288373.0, + "reward": 9.313225746154785e-09, + "reward_std": 1.030226230621338, + "rewards/wordcountpos_reward_GEOBench/mean": 9.313225746154785e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0015906660863739897, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07757599661144256, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869924, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 610 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1085.0, + "completions/max_terminated_length": 1085.0, + "completions/mean_length": 909.8125, + "completions/mean_terminated_length": 909.8125, + "completions/min_length": 796.0, + "completions/min_terminated_length": 796.0, + "epoch": 0.1527881970492623, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3685662232356557, + "kl": 0.0014853477478027344, + "learning_rate": 9.924605321197114e-07, + "loss": -0.0127, + "num_tokens": 27332354.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0518014430999756, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.032872784403799954, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.050272620141529424, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 611 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1311.8125, + "completions/mean_terminated_length": 1198.9000244140625, + "completions/min_length": 842.0, + "completions/min_terminated_length": 842.0, + "epoch": 0.15303825956489123, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2628510348619826, + "kl": 0.004638671875, + "learning_rate": 9.923887602842404e-07, + "loss": 0.0074, + "num_tokens": 27384495.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0563536882400513, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.009219301265540979, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08895827449348724, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16549588783075211, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 612 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1264.8125, + "completions/mean_terminated_length": 1231.21435546875, + "completions/min_length": 972.0, + "completions/min_terminated_length": 972.0, + "epoch": 0.15328832208052012, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7554848601496826, + "kl": 0.0033016204833984375, + "learning_rate": 9.9231665136324e-07, + "loss": 0.0223, + "num_tokens": 27435572.0, + "reward": -4.470348358154297e-08, + "reward_std": 1.0172231197357178, + "rewards/wordcountpos_reward_GEOBench/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.007619381068885907, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.01234126782864031, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0925962962222252, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 613 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1018.0, + "completions/max_terminated_length": 1018.0, + "completions/mean_length": 801.875, + "completions/mean_terminated_length": 801.875, + "completions/min_length": 550.0, + "completions/min_terminated_length": 550.0, + "epoch": 0.15353838459614905, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.135123101286076, + "kl": 0.0051727294921875, + "learning_rate": 9.922442054116554e-07, + "loss": -0.0118, + "num_tokens": 27471618.0, + "reward": 0.0, + "reward_std": 0.947390079498291, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03305191822773273, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08603439061053979, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09108400680852977, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 614 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1124.0, + "completions/max_terminated_length": 1124.0, + "completions/mean_length": 826.75, + "completions/mean_terminated_length": 826.75, + "completions/min_length": 409.0, + "completions/min_terminated_length": 409.0, + "epoch": 0.15378844711177794, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.688029200293511, + "kl": 0.002285003662109375, + "learning_rate": 9.921714224846878e-07, + "loss": -0.0652, + "num_tokens": 27498462.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5162820816040039, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.016450761301142144, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09502317435937908, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036262, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 615 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1338.3125, + "completions/mean_terminated_length": 1241.300048828125, + "completions/min_length": 1086.0, + "completions/min_terminated_length": 1086.0, + "epoch": 0.15403850962740684, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.826000658028198, + "kl": 0.00395965576171875, + "learning_rate": 9.920983026377952e-07, + "loss": 0.0196, + "num_tokens": 27550235.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7362438440322876, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005204727197700034, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.058441448779411036, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066471, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 616 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1385.125, + "completions/mean_terminated_length": 1270.25, + "completions/min_length": 1015.0, + "completions/min_terminated_length": 1015.0, + "epoch": 0.15428857214303576, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2612264139500406, + "kl": 0.005279541015625, + "learning_rate": 9.920248459266924e-07, + "loss": -0.0235, + "num_tokens": 27606397.0, + "reward": 0.0, + "reward_std": 1.0330687761306763, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.040554968042103186, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10268012616288583, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08432740427115681, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 617 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1043.0, + "completions/max_terminated_length": 1043.0, + "completions/mean_length": 771.5625, + "completions/mean_terminated_length": 771.5625, + "completions/min_length": 543.0, + "completions/min_terminated_length": 543.0, + "epoch": 0.15453863465866466, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8674358928306938, + "kl": 0.0031185150146484375, + "learning_rate": 9.919510524073514e-07, + "loss": -0.0026, + "num_tokens": 27644134.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0234863758087158, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.017360905430701703, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.047458672809896456, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05708992257184502, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 618 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1392.0, + "completions/max_terminated_length": 1392.0, + "completions/mean_length": 1125.625, + "completions/mean_terminated_length": 1125.625, + "completions/min_length": 882.0, + "completions/min_terminated_length": 882.0, + "epoch": 0.15478869717429358, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5565550012425375, + "kl": 0.004669189453125, + "learning_rate": 9.91876922136e-07, + "loss": -0.0431, + "num_tokens": 27685160.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0333865880966187, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08157019656944037, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.027080886054761298, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06206328908341753, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 619 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1352.0, + "completions/mean_length": 1161.5, + "completions/mean_terminated_length": 1138.933349609375, + "completions/min_length": 720.0, + "completions/min_terminated_length": 720.0, + "epoch": 0.15503875968992248, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.151974332517757, + "kl": 0.00402069091796875, + "learning_rate": 9.918024551691231e-07, + "loss": -0.0377, + "num_tokens": 27729960.0, + "reward": -1.862645149230957e-08, + "reward_std": 1.0480382442474365, + "rewards/wordcountpos_reward_GEOBench/mean": -1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.16037960356096487, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08534831570783727, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 620 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1313.0, + "completions/max_terminated_length": 1313.0, + "completions/mean_length": 1176.125, + "completions/mean_terminated_length": 1176.125, + "completions/min_length": 875.0, + "completions/min_terminated_length": 875.0, + "epoch": 0.15528882220555137, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3316506183621777, + "kl": 0.004608154296875, + "learning_rate": 9.917276515634622e-07, + "loss": -0.0085, + "num_tokens": 27768458.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.059020757675171, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.031567110022971495, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13765559428901972, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901162, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 621 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1295.25, + "completions/mean_terminated_length": 1227.0, + "completions/min_length": 936.0, + "completions/min_terminated_length": 936.0, + "epoch": 0.1555388847211803, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.45802059023545, + "kl": 0.00580596923828125, + "learning_rate": 9.916525113760153e-07, + "loss": -0.0092, + "num_tokens": 27822494.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0356624126434326, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.052597004338363076, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07102863121226026, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1514742369000235, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 622 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1036.625, + "completions/mean_terminated_length": 1005.7333984375, + "completions/min_length": 616.0, + "completions/min_terminated_length": 616.0, + "epoch": 0.1557889472368092, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.706807278034257, + "kl": 0.003887176513671875, + "learning_rate": 9.915770346640364e-07, + "loss": -0.0098, + "num_tokens": 27867728.0, + "reward": 0.0, + "reward_std": 0.5061618685722351, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.3985647950550977, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.29928728817649347, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11729986896522632, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 623 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1122.8125, + "completions/mean_terminated_length": 1097.666748046875, + "completions/min_length": 620.0, + "completions/min_terminated_length": 620.0, + "epoch": 0.15603900975243812, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5174020318834827, + "kl": 0.005279541015625, + "learning_rate": 9.91501221485036e-07, + "loss": 0.0418, + "num_tokens": 27923557.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0303819179534912, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07456288133233142, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09960331774054818, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333331, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 624 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1377.0, + "completions/mean_length": 993.9375, + "completions/mean_terminated_length": 960.2000732421875, + "completions/min_length": 580.0, + "completions/min_terminated_length": 580.0, + "epoch": 0.15628907226806701, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7826105948322475, + "kl": 0.004657745361328125, + "learning_rate": 9.91425071896782e-07, + "loss": 0.0536, + "num_tokens": 27957220.0, + "reward": 0.0, + "reward_std": 0.8217940926551819, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06621773963468003, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1742045869664654, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11666666666666668, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 625 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1197.0, + "completions/max_terminated_length": 1197.0, + "completions/mean_length": 927.3125, + "completions/mean_terminated_length": 927.3125, + "completions/min_length": 666.0, + "completions/min_terminated_length": 666.0, + "epoch": 0.1565391347836959, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.373620365481586, + "kl": 0.0056915283203125, + "learning_rate": 9.913485859572973e-07, + "loss": 0.0296, + "num_tokens": 28002625.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0066461563110352, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.056157804892149644, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07016392387478562, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466157, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 626 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1217.0, + "completions/max_terminated_length": 1217.0, + "completions/mean_length": 945.0625, + "completions/mean_terminated_length": 945.0625, + "completions/min_length": 757.0, + "completions/min_terminated_length": 757.0, + "epoch": 0.15678919729932483, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.741710021942395, + "kl": 0.00508880615234375, + "learning_rate": 9.912717637248617e-07, + "loss": 0.0078, + "num_tokens": 28044330.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9504626393318176, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0359876874264131, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04465297783361459, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05443310539518174, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 627 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1241.0, + "completions/max_terminated_length": 1241.0, + "completions/mean_length": 1114.8125, + "completions/mean_terminated_length": 1114.8125, + "completions/min_length": 784.0, + "completions/min_terminated_length": 784.0, + "epoch": 0.15703925981495373, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5513194441492386, + "kl": 0.0020580291748046875, + "learning_rate": 9.911946052580113e-07, + "loss": -0.0127, + "num_tokens": 28075847.0, + "reward": 0.0, + "reward_std": 0.9391856789588928, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04977645299471967, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04053812781701323, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08255189164891874, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 628 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1343.0, + "completions/max_terminated_length": 1343.0, + "completions/mean_length": 1143.6875, + "completions/mean_terminated_length": 1143.6875, + "completions/min_length": 1049.0, + "completions/min_terminated_length": 1049.0, + "epoch": 0.15728932233058265, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.7154101496018692, + "kl": 0.0012569427490234375, + "learning_rate": 9.911171106155384e-07, + "loss": -0.0105, + "num_tokens": 28117466.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8642572164535522, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06010961804013554, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15557865708139954, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0850925422157591, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 629 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1310.375, + "completions/mean_terminated_length": 1120.75, + "completions/min_length": 893.0, + "completions/min_terminated_length": 893.0, + "epoch": 0.15753938484621155, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.497447807441208, + "kl": 0.003017425537109375, + "learning_rate": 9.91039279856491e-07, + "loss": -0.0007, + "num_tokens": 28167792.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8962478041648865, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07596817864849001, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14650496114488484, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07290277645477443, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 630 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1492.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1120.8125, + "completions/mean_terminated_length": 1120.8125, + "completions/min_length": 826.0, + "completions/min_terminated_length": 826.0, + "epoch": 0.15778944736184047, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3827000604058073, + "kl": 0.004962921142578125, + "learning_rate": 9.909611130401737e-07, + "loss": -0.0188, + "num_tokens": 28222301.0, + "reward": 0.0, + "reward_std": 1.0509510040283203, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1334720144800754, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13967360251276528, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06763190130459201, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 631 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 992.125, + "completions/mean_terminated_length": 874.923095703125, + "completions/min_length": 618.0, + "completions/min_terminated_length": 618.0, + "epoch": 0.15803950987746937, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5402770892336592, + "kl": 0.00304412841796875, + "learning_rate": 9.908826102261471e-07, + "loss": -0.0676, + "num_tokens": 28269231.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8414763808250427, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.042054245906612345, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08875047925035809, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05900408021045226, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 632 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1261.9375, + "completions/mean_terminated_length": 1153.727294921875, + "completions/min_length": 761.0, + "completions/min_terminated_length": 761.0, + "epoch": 0.15828957239309827, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9061399342344885, + "kl": 0.004383087158203125, + "learning_rate": 9.908037714742278e-07, + "loss": -0.0168, + "num_tokens": 28324334.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9979342818260193, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0295055283739288, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07360985208796259, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09098229375970789, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 633 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1324.0, + "completions/mean_length": 1038.5625, + "completions/mean_terminated_length": 1007.800048828125, + "completions/min_length": 797.0, + "completions/min_terminated_length": 797.0, + "epoch": 0.1585396349087272, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8022553331028246, + "kl": 0.0024547576904296875, + "learning_rate": 9.907245968444882e-07, + "loss": -0.0055, + "num_tokens": 28366455.0, + "reward": 0.0, + "reward_std": 0.7816459536552429, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04888571396386819, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0387487682663508, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09108400680852978, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 634 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1346.25, + "completions/mean_terminated_length": 1276.3636474609375, + "completions/min_length": 1031.0, + "completions/min_terminated_length": 1031.0, + "epoch": 0.15878969742435609, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.164057336455896, + "kl": 0.00555419921875, + "learning_rate": 9.906450863972565e-07, + "loss": 0.0063, + "num_tokens": 28415035.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.049776315689087, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.030612287865133283, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03682816465767381, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11417984514369005, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 635 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1295.0, + "completions/max_terminated_length": 1295.0, + "completions/mean_length": 1058.6875, + "completions/mean_terminated_length": 1058.6875, + "completions/min_length": 766.0, + "completions/min_terminated_length": 766.0, + "epoch": 0.159039759939985, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5634375244688257, + "kl": 0.00522613525390625, + "learning_rate": 9.905652401931176e-07, + "loss": -0.056, + "num_tokens": 28453646.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8575539588928223, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.003961133210424909, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.008292740793764635, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11925695879998878, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 636 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1290.8125, + "completions/mean_terminated_length": 1221.0833740234375, + "completions/min_length": 1032.0, + "completions/min_terminated_length": 1032.0, + "epoch": 0.1592898224556139, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5088738905931454, + "kl": 0.0054168701171875, + "learning_rate": 9.904850582929109e-07, + "loss": -0.0088, + "num_tokens": 28506827.0, + "reward": 0.0, + "reward_std": 0.8492498993873596, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01440574189747687, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1348081702367106, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0697880388775209, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 637 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1281.0, + "completions/mean_length": 1036.4375, + "completions/mean_terminated_length": 881.9166870117188, + "completions/min_length": 769.0, + "completions/min_terminated_length": 769.0, + "epoch": 0.1595398849712428, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.519755042712474, + "kl": 0.0015974044799804688, + "learning_rate": 9.90404540757733e-07, + "loss": -0.0758, + "num_tokens": 28552570.0, + "reward": 0.0, + "reward_std": 1.0252397060394287, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08991618431350884, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10565991396834727, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921948, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 638 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1142.0, + "completions/mean_length": 1242.3125, + "completions/mean_terminated_length": 984.625, + "completions/min_length": 857.0, + "completions/min_terminated_length": 857.0, + "epoch": 0.15978994748687173, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5936024782359937, + "kl": 0.00388336181640625, + "learning_rate": 9.903236876489352e-07, + "loss": 0.0029, + "num_tokens": 28600935.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9568347930908203, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04110840384394786, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.23128930185627536, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 639 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1399.0, + "completions/max_terminated_length": 1399.0, + "completions/mean_length": 938.3125, + "completions/mean_terminated_length": 938.3125, + "completions/min_length": 715.0, + "completions/min_terminated_length": 715.0, + "epoch": 0.16004001000250062, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.981009753058589, + "kl": 0.0032405853271484375, + "learning_rate": 9.902424990281253e-07, + "loss": -0.0037, + "num_tokens": 28639460.0, + "reward": 0.0, + "reward_std": 0.8215370774269104, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.008527577715944364, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.022343645791461725, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07290277645477446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 640 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1223.0, + "completions/max_terminated_length": 1223.0, + "completions/mean_length": 960.25, + "completions/mean_terminated_length": 960.25, + "completions/min_length": 704.0, + "completions/min_terminated_length": 704.0, + "epoch": 0.16029007251812954, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.508904484543214, + "kl": 0.00653076171875, + "learning_rate": 9.901609749571658e-07, + "loss": 0.0257, + "num_tokens": 28679512.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8503090739250183, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03168570932806421, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.049323766336225504, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10036968702787749, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 641 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1318.0, + "completions/max_terminated_length": 1318.0, + "completions/mean_length": 884.625, + "completions/mean_terminated_length": 884.625, + "completions/min_length": 436.0, + "completions/min_terminated_length": 436.0, + "epoch": 0.16054013503375844, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.092859077503906, + "kl": 0.00536346435546875, + "learning_rate": 9.900791154981757e-07, + "loss": 0.034, + "num_tokens": 28718914.0, + "reward": 0.0, + "reward_std": 0.679477334022522, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.033040165389410965, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.046874576273069235, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 642 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1305.0, + "completions/mean_length": 1031.4375, + "completions/mean_terminated_length": 1000.2000732421875, + "completions/min_length": 582.0, + "completions/min_terminated_length": 582.0, + "epoch": 0.16079019754938734, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9588146243709743, + "kl": 0.0060272216796875, + "learning_rate": 9.899969207135288e-07, + "loss": -0.0812, + "num_tokens": 28756769.0, + "reward": 0.0, + "reward_std": 0.9475013613700867, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02933121003727431, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06227279640697939, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408158, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 643 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1314.0, + "completions/max_terminated_length": 1314.0, + "completions/mean_length": 1017.125, + "completions/mean_terminated_length": 1017.125, + "completions/min_length": 815.0, + "completions/min_terminated_length": 815.0, + "epoch": 0.16104026006501626, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.077724012930272, + "kl": 0.004322052001953125, + "learning_rate": 9.899143906658553e-07, + "loss": -0.0426, + "num_tokens": 28796139.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8259453177452087, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03784467636723547, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09345927574362861, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0925962962222252, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 644 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1140.0, + "completions/mean_terminated_length": 1116.0, + "completions/min_length": 778.0, + "completions/min_terminated_length": 778.0, + "epoch": 0.16129032258064516, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0029827227585058, + "kl": 0.0041961669921875, + "learning_rate": 9.8983152541804e-07, + "loss": -0.0128, + "num_tokens": 28830635.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.807441234588623, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01741994365286357, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08873955971301724, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 645 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1444.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1125.5, + "completions/mean_terminated_length": 1125.5, + "completions/min_length": 903.0, + "completions/min_terminated_length": 903.0, + "epoch": 0.16154038509627408, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.676086386525002, + "kl": 0.0032501220703125, + "learning_rate": 9.897483250332235e-07, + "loss": -0.0151, + "num_tokens": 28875395.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.790328323841095, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09379346818386113, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.22489303050319207, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466156, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 646 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1407.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1051.5625, + "completions/mean_terminated_length": 1051.5625, + "completions/min_length": 851.0, + "completions/min_terminated_length": 851.0, + "epoch": 0.16179044761190298, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7931834109679805, + "kl": 0.0049591064453125, + "learning_rate": 9.89664789574802e-07, + "loss": -0.0277, + "num_tokens": 28905164.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9757005572319031, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.003999683517841895, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.034710988306426695, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 647 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1243.375, + "completions/mean_terminated_length": 1226.2667236328125, + "completions/min_length": 943.0, + "completions/min_terminated_length": 943.0, + "epoch": 0.16204051012753187, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.743075513581391, + "kl": 0.00617218017578125, + "learning_rate": 9.895809191064265e-07, + "loss": -0.01, + "num_tokens": 28958290.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0277540683746338, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.026626640807253056, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09436044604850641, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08073734277593311, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 648 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1369.0, + "completions/mean_length": 1115.5, + "completions/mean_terminated_length": 1060.571533203125, + "completions/min_length": 862.0, + "completions/min_terminated_length": 862.0, + "epoch": 0.1622905726431608, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.530297147186997, + "kl": 0.00628662109375, + "learning_rate": 9.894967136920033e-07, + "loss": 0.0045, + "num_tokens": 28993458.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9571292400360107, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.020095920331486848, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06519459146638021, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08062257748298553, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 649 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 1238.8125, + "completions/mean_terminated_length": 1201.5, + "completions/min_length": 819.0, + "completions/min_terminated_length": 819.0, + "epoch": 0.1625406351587897, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0724576164832738, + "kl": 0.0043792724609375, + "learning_rate": 9.894121733956946e-07, + "loss": -0.0443, + "num_tokens": 29040359.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.9077334403991699, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02847660567689668, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08529929549679423, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06871842709362769, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 650 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1404.6875, + "completions/mean_terminated_length": 1347.5, + "completions/min_length": 757.0, + "completions/min_terminated_length": 757.0, + "epoch": 0.16279069767441862, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0290959374877264, + "kl": 0.0018978118896484375, + "learning_rate": 9.89327298281917e-07, + "loss": 0.0306, + "num_tokens": 29093146.0, + "reward": 0.0, + "reward_std": 0.6023900508880615, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.028824305467279196, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05713678269956473, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12524050936172845, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 651 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1498.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1085.3125, + "completions/mean_terminated_length": 1085.3125, + "completions/min_length": 872.0, + "completions/min_terminated_length": 872.0, + "epoch": 0.1630407601900475, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4308899662530297, + "kl": 0.005401611328125, + "learning_rate": 9.892420884153426e-07, + "loss": -0.0042, + "num_tokens": 29135799.0, + "reward": 0.0, + "reward_std": 0.8002134561538696, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08186471776886005, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11120563958717151, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6416666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09999999999999999, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 652 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1316.3125, + "completions/mean_terminated_length": 1132.625, + "completions/min_length": 1022.0, + "completions/min_terminated_length": 1022.0, + "epoch": 0.1632908227056764, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.277405703716849, + "kl": 0.00574493408203125, + "learning_rate": 9.891565438608985e-07, + "loss": -0.0428, + "num_tokens": 29179076.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0661088228225708, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.000820199735267888, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.037798651738208705, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0825518916489187, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 653 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1290.0, + "completions/max_terminated_length": 1290.0, + "completions/mean_length": 1068.25, + "completions/mean_terminated_length": 1068.25, + "completions/min_length": 856.0, + "completions/min_terminated_length": 856.0, + "epoch": 0.16354088522130533, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7811095135981017, + "kl": 0.00612640380859375, + "learning_rate": 9.890706646837668e-07, + "loss": -0.0038, + "num_tokens": 29234904.0, + "reward": -2.60770320892334e-08, + "reward_std": 0.9818300008773804, + "rewards/wordcountpos_reward_GEOBench/mean": -2.60770320892334e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02413956992722924, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03399196411179305, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09651328828101766, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 654 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1475.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1075.0, + "completions/mean_terminated_length": 1075.0, + "completions/min_length": 857.0, + "completions/min_terminated_length": 857.0, + "epoch": 0.16379094773693423, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.529640652746227, + "kl": 0.00597381591796875, + "learning_rate": 9.889844509493845e-07, + "loss": -0.0085, + "num_tokens": 29284880.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0676302909851074, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.061013736918769344, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10733455137992266, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 655 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1232.0, + "completions/mean_length": 1297.6875, + "completions/mean_terminated_length": 1095.375, + "completions/min_length": 968.0, + "completions/min_terminated_length": 968.0, + "epoch": 0.16404101025256315, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4407702253560237, + "kl": 0.003002166748046875, + "learning_rate": 9.888979027234438e-07, + "loss": 0.0178, + "num_tokens": 29337579.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.055144190788269, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07763762608950125, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11455459986231567, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11013459778666118, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 656 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1429.0, + "completions/mean_length": 1240.0625, + "completions/mean_terminated_length": 1202.9285888671875, + "completions/min_length": 907.0, + "completions/min_terminated_length": 907.0, + "epoch": 0.16429107276819205, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8420677879553558, + "kl": 0.0041046142578125, + "learning_rate": 9.888110200718912e-07, + "loss": 0.0045, + "num_tokens": 29371868.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9852498769760132, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05218161354216687, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09340211923766491, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05146016078626408, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 657 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1460.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1083.0625, + "completions/mean_terminated_length": 1083.0625, + "completions/min_length": 614.0, + "completions/min_terminated_length": 614.0, + "epoch": 0.16454113528382094, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4334628044585296, + "kl": 0.004871368408203125, + "learning_rate": 9.887238030609288e-07, + "loss": -0.0509, + "num_tokens": 29421877.0, + "reward": 0.0, + "reward_std": 0.5072119832038879, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1451978351680613, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17516440052359825, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16771890063326086, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 658 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1455.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1067.5625, + "completions/mean_terminated_length": 1067.5625, + "completions/min_length": 632.0, + "completions/min_terminated_length": 632.0, + "epoch": 0.16479119779944987, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9771338179175766, + "kl": 0.003814697265625, + "learning_rate": 9.886362517570129e-07, + "loss": -0.0306, + "num_tokens": 29467814.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6987144947052002, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11514355113089288, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12958933844527037, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07084150279686706, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 659 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1383.0, + "completions/max_terminated_length": 1383.0, + "completions/mean_length": 951.0, + "completions/mean_terminated_length": 951.0, + "completions/min_length": 470.0, + "completions/min_terminated_length": 470.0, + "epoch": 0.16504126031507876, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5949515184725715, + "kl": 0.00601959228515625, + "learning_rate": 9.885483662268545e-07, + "loss": 0.0394, + "num_tokens": 29514046.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9163892269134521, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0029266674768073176, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05793194580824228, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6416666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0683130051063973, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 660 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1169.6875, + "completions/mean_terminated_length": 1093.4615478515625, + "completions/min_length": 843.0, + "completions/min_terminated_length": 843.0, + "epoch": 0.1652913228307077, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.493433205598436, + "kl": 0.00600433349609375, + "learning_rate": 9.884601465374197e-07, + "loss": 0.0749, + "num_tokens": 29550889.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7188822031021118, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1989227226176672, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.22773317271269397, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026005, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 661 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1331.0, + "completions/mean_length": 1066.8125, + "completions/mean_terminated_length": 729.888916015625, + "completions/min_length": 623.0, + "completions/min_terminated_length": 623.0, + "epoch": 0.16554138534633658, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.269163133895344, + "kl": 0.0022735595703125, + "learning_rate": 9.883715927559288e-07, + "loss": 0.0416, + "num_tokens": 29595950.0, + "reward": 2.2351741790771484e-08, + "reward_std": 0.9316185712814331, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.37903746212358674, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.47044156975082635, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09496588081262933, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 662 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1225.4375, + "completions/mean_terminated_length": 1186.21435546875, + "completions/min_length": 975.0, + "completions/min_terminated_length": 975.0, + "epoch": 0.16579144786196548, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.960752195203952, + "kl": 0.00482940673828125, + "learning_rate": 9.882827049498569e-07, + "loss": 0.0017, + "num_tokens": 29643933.0, + "reward": 0.0, + "reward_std": 0.7641773223876953, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04348726517712267, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06265280183595295, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1704025734460517, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 663 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1215.0, + "completions/mean_length": 920.4375, + "completions/mean_terminated_length": 881.800048828125, + "completions/min_length": 569.0, + "completions/min_terminated_length": 569.0, + "epoch": 0.1660415103775944, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.634142400089069, + "kl": 0.00496673583984375, + "learning_rate": 9.88193483186934e-07, + "loss": -0.023, + "num_tokens": 29679380.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0456384420394897, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00037113055023559396, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03714300617598377, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078612, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 664 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1264.0, + "completions/mean_length": 1138.875, + "completions/mean_terminated_length": 1018.5, + "completions/min_length": 801.0, + "completions/min_terminated_length": 801.0, + "epoch": 0.1662915728932233, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.443513399073446, + "kl": 0.00356292724609375, + "learning_rate": 9.881039275351432e-07, + "loss": 0.0077, + "num_tokens": 29718010.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8946557641029358, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03951538808917681, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06458969969917827, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04868644955601477, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 665 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1305.0, + "completions/mean_length": 1117.6875, + "completions/mean_terminated_length": 1092.2000732421875, + "completions/min_length": 943.0, + "completions/min_terminated_length": 943.0, + "epoch": 0.16654163540885222, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.386257180110526, + "kl": 0.0053863525390625, + "learning_rate": 9.880140380627238e-07, + "loss": -0.0501, + "num_tokens": 29755973.0, + "reward": -3.725290298461914e-09, + "reward_std": 0.935474157333374, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04224371022976678, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06549163307109823, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 666 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1400.375, + "completions/mean_terminated_length": 1322.888916015625, + "completions/min_length": 1181.0, + "completions/min_terminated_length": 1181.0, + "epoch": 0.16679169792448112, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1756901012535357, + "kl": 0.0022563934326171875, + "learning_rate": 9.87923814838168e-07, + "loss": 0.0015, + "num_tokens": 29818611.0, + "reward": 0.0, + "reward_std": 0.8530460596084595, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10941166853682181, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11690888434010989, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042256, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 667 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1391.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1117.9375, + "completions/mean_terminated_length": 1117.9375, + "completions/min_length": 926.0, + "completions/min_terminated_length": 926.0, + "epoch": 0.16704176044011002, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2592099396802072, + "kl": 0.0023860931396484375, + "learning_rate": 9.878332579302232e-07, + "loss": -0.0115, + "num_tokens": 29862554.0, + "reward": 0.0, + "reward_std": 0.9024142026901245, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.21014769125074415, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17662258116712992, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07302967433402217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 668 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1428.3125, + "completions/mean_terminated_length": 1308.8333740234375, + "completions/min_length": 1202.0, + "completions/min_terminated_length": 1202.0, + "epoch": 0.16729182295573894, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.469363512289809, + "kl": 0.003520965576171875, + "learning_rate": 9.877423674078906e-07, + "loss": 0.0161, + "num_tokens": 29931535.0, + "reward": 0.0, + "reward_std": 0.7721085548400879, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.3827319600957266, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.40603774745545673, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06978803887752094, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 669 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1169.0, + "completions/max_terminated_length": 1169.0, + "completions/mean_length": 995.125, + "completions/mean_terminated_length": 995.125, + "completions/min_length": 802.0, + "completions/min_terminated_length": 802.0, + "epoch": 0.16754188547136784, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0381514004814263, + "kl": 0.005062103271484375, + "learning_rate": 9.876511433404257e-07, + "loss": 0.0199, + "num_tokens": 29969313.0, + "reward": 0.0, + "reward_std": 0.7515852451324463, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.035480154493991174, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09702716016854289, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901162, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 670 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1401.3125, + "completions/mean_terminated_length": 1302.625, + "completions/min_length": 993.0, + "completions/min_terminated_length": 993.0, + "epoch": 0.16779194798699676, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.771721243304098, + "kl": 0.0035858154296875, + "learning_rate": 9.875595857973385e-07, + "loss": 0.0112, + "num_tokens": 30025638.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9293917417526245, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.044877416065130724, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07515363534273296, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05443310539518174, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 671 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1291.375, + "completions/mean_terminated_length": 1166.2000732421875, + "completions/min_length": 889.0, + "completions/min_terminated_length": 889.0, + "epoch": 0.16804201050262565, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.133604148371529, + "kl": 0.002948760986328125, + "learning_rate": 9.874676948483926e-07, + "loss": -0.0328, + "num_tokens": 30074852.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9176056385040283, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.13897860751907903, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15776637877836533, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333334, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 672 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1306.0, + "completions/max_terminated_length": 1306.0, + "completions/mean_length": 1042.0625, + "completions/mean_terminated_length": 1042.0625, + "completions/min_length": 702.0, + "completions/min_terminated_length": 702.0, + "epoch": 0.16829207301825455, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.322233890960668, + "kl": 0.005939483642578125, + "learning_rate": 9.873754705636058e-07, + "loss": -0.0348, + "num_tokens": 30118781.0, + "reward": 0.0, + "reward_std": 0.7571059465408325, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0206224889594218, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.055598781570819045, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194865, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 673 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1323.0, + "completions/max_terminated_length": 1323.0, + "completions/mean_length": 1105.1875, + "completions/mean_terminated_length": 1105.1875, + "completions/min_length": 822.0, + "completions/min_terminated_length": 822.0, + "epoch": 0.16854213553388347, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.794547181309379, + "kl": 0.0047760009765625, + "learning_rate": 9.872829130132502e-07, + "loss": -0.0123, + "num_tokens": 30157832.0, + "reward": 0.0, + "reward_std": 0.6994938850402832, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03639004517623455, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07858153454817589, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 674 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1401.0, + "completions/mean_terminated_length": 1341.5999755859375, + "completions/min_length": 1065.0, + "completions/min_terminated_length": 1065.0, + "epoch": 0.16879219804951237, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.629023208055695, + "kl": 0.003856658935546875, + "learning_rate": 9.871900222678514e-07, + "loss": 0.0114, + "num_tokens": 30206696.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.0017818212509155, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.013680306931102351, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1495709039859774, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 675 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 789.0, + "completions/max_terminated_length": 789.0, + "completions/mean_length": 674.3125, + "completions/mean_terminated_length": 674.3125, + "completions/min_length": 534.0, + "completions/min_terminated_length": 534.0, + "epoch": 0.1690422605651413, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3404814672588823, + "kl": 0.0019521713256835938, + "learning_rate": 9.87096798398189e-07, + "loss": -0.018, + "num_tokens": 30234933.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9836888313293457, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08032299366088588, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0776400677200827, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05692750425533113, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 676 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1283.0, + "completions/mean_length": 1203.4375, + "completions/mean_terminated_length": 972.7777709960938, + "completions/min_length": 592.0, + "completions/min_terminated_length": 592.0, + "epoch": 0.1692923230807702, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1025306474388605, + "kl": 0.004138946533203125, + "learning_rate": 9.870032414752965e-07, + "loss": -0.0387, + "num_tokens": 30284484.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5514116883277893, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06850130867668273, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0711795808567326, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08509254221575907, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 677 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1256.0, + "completions/mean_terminated_length": 1221.1429443359375, + "completions/min_length": 910.0, + "completions/min_terminated_length": 910.0, + "epoch": 0.1695423855963991, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1917866770623475, + "kl": 0.0061798095703125, + "learning_rate": 9.869093515704612e-07, + "loss": -0.0407, + "num_tokens": 30338932.0, + "reward": 0.0, + "reward_std": 0.7379283905029297, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02965950269844044, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07642510453156376, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715323, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 678 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1404.0, + "completions/mean_length": 1119.75, + "completions/mean_terminated_length": 1094.4000244140625, + "completions/min_length": 958.0, + "completions/min_terminated_length": 958.0, + "epoch": 0.169792448112028, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.991962734747193, + "kl": 0.0045166015625, + "learning_rate": 9.868151287552242e-07, + "loss": -0.0121, + "num_tokens": 30375704.0, + "reward": 0.0, + "reward_std": 1.028915286064148, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00616522023083188, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04281886360051347, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07187952884282611, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 679 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1440.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1175.375, + "completions/mean_terminated_length": 1175.375, + "completions/min_length": 968.0, + "completions/min_terminated_length": 968.0, + "epoch": 0.1700425106276569, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2842928330996464, + "kl": 0.00567626953125, + "learning_rate": 9.867205731013799e-07, + "loss": 0.01, + "num_tokens": 30414206.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9882248044013977, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0939150878539765, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09508390813294414, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 680 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1335.0, + "completions/max_terminated_length": 1335.0, + "completions/mean_length": 1123.5625, + "completions/mean_terminated_length": 1123.5625, + "completions/min_length": 1025.0, + "completions/min_terminated_length": 1025.0, + "epoch": 0.17029257314328583, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.7222500204738935, + "kl": 0.0016469955444335938, + "learning_rate": 9.866256846809768e-07, + "loss": 0.0158, + "num_tokens": 30463975.0, + "reward": 0.0, + "reward_std": 1.0214999914169312, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09079340380405028, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16939628962137535, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05426273532033239, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 681 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1407.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1164.375, + "completions/mean_terminated_length": 1164.375, + "completions/min_length": 980.0, + "completions/min_terminated_length": 980.0, + "epoch": 0.17054263565891473, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7206983419666475, + "kl": 0.00705718994140625, + "learning_rate": 9.865304635663168e-07, + "loss": 0.0098, + "num_tokens": 30509709.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9880233407020569, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04849155069243096, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07147278676850169, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09269623828717428, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 682 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1288.0, + "completions/max_terminated_length": 1288.0, + "completions/mean_length": 971.3125, + "completions/mean_terminated_length": 971.3125, + "completions/min_length": 765.0, + "completions/min_terminated_length": 765.0, + "epoch": 0.17079269817454362, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.1276585452549546, + "kl": 0.0011903047561645508, + "learning_rate": 9.86434909829955e-07, + "loss": 0.0321, + "num_tokens": 30548754.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8285993933677673, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08691946433202313, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05847831086265841, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14375905768565217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 683 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1181.0, + "completions/max_terminated_length": 1181.0, + "completions/mean_length": 1036.0, + "completions/mean_terminated_length": 1036.0, + "completions/min_length": 797.0, + "completions/min_terminated_length": 797.0, + "epoch": 0.17104276069017255, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2843843797518084, + "kl": 0.00481414794921875, + "learning_rate": 9.863390235447e-07, + "loss": 0.0084, + "num_tokens": 30589786.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.033435344696045, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02028996497537607, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08437440631279108, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563383, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 684 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1360.0, + "completions/mean_length": 1088.8125, + "completions/mean_terminated_length": 993.923095703125, + "completions/min_length": 293.0, + "completions/min_terminated_length": 293.0, + "epoch": 0.17129282320580144, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2793686781646896, + "kl": 0.00429534912109375, + "learning_rate": 9.862428047836143e-07, + "loss": -0.0444, + "num_tokens": 30650927.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9694852828979492, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04914754737595524, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0873668557582562, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1354690069789096, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 685 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1276.6875, + "completions/mean_terminated_length": 1202.25, + "completions/min_length": 963.0, + "completions/min_terminated_length": 963.0, + "epoch": 0.17154288572143037, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.624389590023649, + "kl": 0.00437164306640625, + "learning_rate": 9.861462536200134e-07, + "loss": 0.0059, + "num_tokens": 30709922.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0166723728179932, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.027598994658975966, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13808058368593346, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10886621079036346, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 686 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1393.0, + "completions/mean_length": 1124.5, + "completions/mean_terminated_length": 1099.4666748046875, + "completions/min_length": 840.0, + "completions/min_terminated_length": 840.0, + "epoch": 0.17179294823705926, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.812190127945849, + "kl": 0.004039764404296875, + "learning_rate": 9.860493701274659e-07, + "loss": 0.0216, + "num_tokens": 30746906.0, + "reward": 0.0, + "reward_std": 0.7034833431243896, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.045603024633891605, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09650554941416542, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 687 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1393.0, + "completions/max_terminated_length": 1393.0, + "completions/mean_length": 1177.5625, + "completions/mean_terminated_length": 1177.5625, + "completions/min_length": 1050.0, + "completions/min_terminated_length": 1050.0, + "epoch": 0.17204301075268819, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.869080802539886, + "kl": 0.0048980712890625, + "learning_rate": 9.85952154379794e-07, + "loss": 0.0155, + "num_tokens": 30790027.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9264480471611023, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.026064140605372165, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07937127243635701, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0909822937597079, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 688 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1398.0, + "completions/max_terminated_length": 1398.0, + "completions/mean_length": 1015.25, + "completions/mean_terminated_length": 1015.25, + "completions/min_length": 745.0, + "completions/min_terminated_length": 745.0, + "epoch": 0.17229307326831708, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.818938080152742, + "kl": 0.00374603271484375, + "learning_rate": 9.858546064510726e-07, + "loss": -0.0172, + "num_tokens": 30840063.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9846839904785156, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.23830658958696924, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.27565049715393297, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 689 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1311.0625, + "completions/mean_terminated_length": 1267.4615478515625, + "completions/min_length": 1047.0, + "completions/min_terminated_length": 1047.0, + "epoch": 0.17254313578394598, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6474332222836243, + "kl": 0.00438690185546875, + "learning_rate": 9.857567264156303e-07, + "loss": -0.0238, + "num_tokens": 30890184.0, + "reward": 0.0, + "reward_std": 0.9352826476097107, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0029094272665042423, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.066602114697329, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08062257748298551, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 690 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1152.4375, + "completions/mean_terminated_length": 1129.2667236328125, + "completions/min_length": 681.0, + "completions/min_terminated_length": 681.0, + "epoch": 0.1727931982995749, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1157815131134377, + "kl": 0.00878143310546875, + "learning_rate": 9.85658514348048e-07, + "loss": 0.0002, + "num_tokens": 30933767.0, + "reward": 0.0, + "reward_std": 0.5763227939605713, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.13964249935652695, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.27632897586726657, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12852438880818892, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 691 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1366.375, + "completions/mean_terminated_length": 1194.571533203125, + "completions/min_length": 787.0, + "completions/min_terminated_length": 787.0, + "epoch": 0.1730432608152038, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.718270346121775, + "kl": 0.004337310791015625, + "learning_rate": 9.855599703231604e-07, + "loss": 0.0503, + "num_tokens": 30987653.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9104744791984558, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.017844709162032544, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05105769566044047, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353543, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 692 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1237.1875, + "completions/mean_terminated_length": 1219.666748046875, + "completions/min_length": 934.0, + "completions/min_terminated_length": 934.0, + "epoch": 0.17329332333083272, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.198894425549332, + "kl": 0.006011962890625, + "learning_rate": 9.854610944160546e-07, + "loss": -0.0323, + "num_tokens": 31034808.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.058815360069275, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11360646273360653, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08076168080712294, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869927, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 693 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1276.0, + "completions/max_terminated_length": 1276.0, + "completions/mean_length": 927.9375, + "completions/mean_terminated_length": 927.9375, + "completions/min_length": 647.0, + "completions/min_terminated_length": 647.0, + "epoch": 0.17354338584646162, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.972815108169843, + "kl": 0.0070648193359375, + "learning_rate": 9.85361886702071e-07, + "loss": -0.0601, + "num_tokens": 31079487.0, + "reward": 0.0, + "reward_std": 0.5373533964157104, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.015936173377582196, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18611743482201726, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10027739304327551, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 694 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 1194.75, + "completions/mean_terminated_length": 1151.1429443359375, + "completions/min_length": 879.0, + "completions/min_terminated_length": 879.0, + "epoch": 0.1737934483620905, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.55558835534266, + "kl": 0.00725555419921875, + "learning_rate": 9.852623472568022e-07, + "loss": 0.0364, + "num_tokens": 31122939.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0661258697509766, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07689038426951877, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06150411887084043, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15723301886761007, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 695 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1235.0, + "completions/max_terminated_length": 1235.0, + "completions/mean_length": 974.3125, + "completions/mean_terminated_length": 974.3125, + "completions/min_length": 723.0, + "completions/min_terminated_length": 723.0, + "epoch": 0.17404351087771944, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1529370373590595, + "kl": 0.004154205322265625, + "learning_rate": 9.851624761560941e-07, + "loss": 0.0026, + "num_tokens": 31161336.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8553957343101501, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01811492860740628, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.055714430740861215, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10532137766186217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 696 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1436.9375, + "completions/mean_terminated_length": 1331.8333740234375, + "completions/min_length": 1069.0, + "completions/min_terminated_length": 1069.0, + "epoch": 0.17429357339334833, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7953523086292615, + "kl": 0.0058441162109375, + "learning_rate": 9.850622734760452e-07, + "loss": 0.0069, + "num_tokens": 31224167.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6519376039505005, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.13903277254184385, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21453607830637017, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 697 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1485.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1338.125, + "completions/mean_terminated_length": 1338.125, + "completions/min_length": 1203.0, + "completions/min_terminated_length": 1203.0, + "epoch": 0.17454363590897726, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.712616496438221, + "kl": 0.00421905517578125, + "learning_rate": 9.849617392930068e-07, + "loss": 0.0117, + "num_tokens": 31276377.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.0587139129638672, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005055912092982905, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12702278654194613, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1067187372905475, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 698 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1254.75, + "completions/mean_terminated_length": 1143.272705078125, + "completions/min_length": 738.0, + "completions/min_terminated_length": 738.0, + "epoch": 0.17479369842460615, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.163042667831791, + "kl": 0.004421234130859375, + "learning_rate": 9.848608736835822e-07, + "loss": 0.0302, + "num_tokens": 31321117.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.0649840831756592, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.041606983906928866, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06755416991499, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 699 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1423.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 953.25, + "completions/mean_terminated_length": 953.25, + "completions/min_length": 444.0, + "completions/min_terminated_length": 444.0, + "epoch": 0.17504376094023505, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.908095839864611, + "kl": 0.00411224365234375, + "learning_rate": 9.84759676724628e-07, + "loss": -0.0802, + "num_tokens": 31355809.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.522136926651001, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.019748174711678234, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09104546685919365, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10610965676722955, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 700 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1301.0, + "completions/max_terminated_length": 1301.0, + "completions/mean_length": 1045.3125, + "completions/mean_terminated_length": 1045.3125, + "completions/min_length": 619.0, + "completions/min_terminated_length": 619.0, + "epoch": 0.17529382345586397, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.591666200771933, + "kl": 0.00426483154296875, + "learning_rate": 9.846581484932528e-07, + "loss": -0.0401, + "num_tokens": 31392894.0, + "reward": 0.0, + "reward_std": 0.534155011177063, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05316756236899465, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0726793553559965, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1517795672580372, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 701 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1235.5, + "completions/mean_terminated_length": 1197.71435546875, + "completions/min_length": 827.0, + "completions/min_terminated_length": 827.0, + "epoch": 0.17554388597149287, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.33644122348286, + "kl": 0.0058135986328125, + "learning_rate": 9.845562890668176e-07, + "loss": 0.0049, + "num_tokens": 31450750.0, + "reward": 0.0, + "reward_std": 0.9085119962692261, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.22309452714284148, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.38574771053146345, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14707015206910487, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 702 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1341.0, + "completions/mean_terminated_length": 1245.5999755859375, + "completions/min_length": 997.0, + "completions/min_terminated_length": 997.0, + "epoch": 0.1757939484871218, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.227106060739032, + "kl": 0.00322723388671875, + "learning_rate": 9.844540985229361e-07, + "loss": -0.0, + "num_tokens": 31494310.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5699238777160645, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1508945427556456, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14346229544966718, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 703 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1492.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1259.375, + "completions/mean_terminated_length": 1259.375, + "completions/min_length": 1083.0, + "completions/min_terminated_length": 1083.0, + "epoch": 0.1760440110027507, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.253462945891019, + "kl": 0.002758026123046875, + "learning_rate": 9.843515769394738e-07, + "loss": -0.0069, + "num_tokens": 31540228.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9843798875808716, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06163698014050265, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.049863524737517415, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 704 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1453.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1205.9375, + "completions/mean_terminated_length": 1205.9375, + "completions/min_length": 957.0, + "completions/min_terminated_length": 957.0, + "epoch": 0.17629407351837958, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8814873325856762, + "kl": 0.00545501708984375, + "learning_rate": 9.842487243945492e-07, + "loss": 0.0035, + "num_tokens": 31588931.0, + "reward": 0.0, + "reward_std": 0.6505736708641052, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01791387181371239, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04934297907498743, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10174405069512348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 705 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1086.375, + "completions/mean_terminated_length": 1058.800048828125, + "completions/min_length": 814.0, + "completions/min_terminated_length": 814.0, + "epoch": 0.1765441360340085, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.108353487872075, + "kl": 0.003223419189453125, + "learning_rate": 9.841455409665322e-07, + "loss": -0.0137, + "num_tokens": 31629433.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8794063925743103, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.021193721119339693, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06329484574220316, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 706 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1202.25, + "completions/mean_terminated_length": 1159.71435546875, + "completions/min_length": 776.0, + "completions/min_terminated_length": 776.0, + "epoch": 0.1767941985496374, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0610646645902393, + "kl": 0.00540924072265625, + "learning_rate": 9.840420267340451e-07, + "loss": -0.0276, + "num_tokens": 31682573.0, + "reward": 0.0, + "reward_std": 1.0137903690338135, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.16291065028060875, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2062212746513509, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 707 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1307.0, + "completions/max_terminated_length": 1307.0, + "completions/mean_length": 981.3125, + "completions/mean_terminated_length": 981.3125, + "completions/min_length": 658.0, + "completions/min_terminated_length": 658.0, + "epoch": 0.17704426106526633, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.848688748790686, + "kl": 0.00655364990234375, + "learning_rate": 9.839381817759626e-07, + "loss": 0.0114, + "num_tokens": 31725338.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9328153133392334, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09982396066492906, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06645204800072713, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1192569587999888, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 708 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 1413.4375, + "completions/mean_terminated_length": 1302.1429443359375, + "completions/min_length": 1119.0, + "completions/min_terminated_length": 1119.0, + "epoch": 0.17729432358089522, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3561025080495215, + "kl": 0.0030364990234375, + "learning_rate": 9.838340061714106e-07, + "loss": 0.0105, + "num_tokens": 31789425.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6254506707191467, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.006565889646817357, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07596714452712161, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 709 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1388.6875, + "completions/mean_terminated_length": 1321.9000244140625, + "completions/min_length": 1137.0, + "completions/min_terminated_length": 1137.0, + "epoch": 0.17754438609652412, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8046370331543886, + "kl": 0.00446319580078125, + "learning_rate": 9.83729499999768e-07, + "loss": 0.0079, + "num_tokens": 31843796.0, + "reward": 0.0, + "reward_std": 0.7616355419158936, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.13818015506413767, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14361407528177564, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07302967433402217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 710 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1234.0, + "completions/mean_length": 1188.25, + "completions/mean_terminated_length": 1046.5455322265625, + "completions/min_length": 798.0, + "completions/min_terminated_length": 798.0, + "epoch": 0.17779444861215304, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0982730519857293, + "kl": 0.0050048828125, + "learning_rate": 9.836246633406648e-07, + "loss": 0.0132, + "num_tokens": 31895432.0, + "reward": 0.0, + "reward_std": 0.7961058616638184, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06533079911582863, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08083713495988176, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258099, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 711 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1226.9375, + "completions/mean_terminated_length": 1135.916748046875, + "completions/min_length": 923.0, + "completions/min_terminated_length": 923.0, + "epoch": 0.17804451112778194, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3752944164484173, + "kl": 0.005615234375, + "learning_rate": 9.83519496273983e-07, + "loss": 0.0068, + "num_tokens": 31938887.0, + "reward": 0.0, + "reward_std": 0.4155014157295227, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.3274928960884062, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.4158114863581918, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09727776191382576, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 712 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 1184.0, + "completions/mean_terminated_length": 1162.933349609375, + "completions/min_length": 857.0, + "completions/min_terminated_length": 857.0, + "epoch": 0.17829457364341086, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.3357217199193814, + "kl": 0.0015869140625, + "learning_rate": 9.834139988798563e-07, + "loss": 0.0268, + "num_tokens": 31977047.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9382139444351196, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01341789437361911, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08403300045165756, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06309898162000306, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 713 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1208.375, + "completions/mean_terminated_length": 1141.0770263671875, + "completions/min_length": 858.0, + "completions/min_terminated_length": 858.0, + "epoch": 0.17854463615903976, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2672937742323067, + "kl": 0.00562286376953125, + "learning_rate": 9.833081712386704e-07, + "loss": -0.0564, + "num_tokens": 32024333.0, + "reward": 0.0, + "reward_std": 1.0414798259735107, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0016211836316102207, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07353513354885861, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639736, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 714 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1350.0, + "completions/mean_length": 1060.5, + "completions/mean_terminated_length": 914.0, + "completions/min_length": 721.0, + "completions/min_terminated_length": 721.0, + "epoch": 0.17879469867466866, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.765077004650666, + "kl": 0.003887176513671875, + "learning_rate": 9.832020134310626e-07, + "loss": -0.0171, + "num_tokens": 32071789.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.6076522469520569, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.041212909837456325, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2146395917981376, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026001, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 715 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1292.0, + "completions/max_terminated_length": 1292.0, + "completions/mean_length": 907.8125, + "completions/mean_terminated_length": 907.8125, + "completions/min_length": 690.0, + "completions/min_terminated_length": 690.0, + "epoch": 0.17904476119029758, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.504035988002599, + "kl": 0.003528594970703125, + "learning_rate": 9.830955255379213e-07, + "loss": -0.003, + "num_tokens": 32111978.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.8405269384384155, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.037838144006038875, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06952809972447198, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09888264649460886, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 716 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1298.0, + "completions/mean_length": 1080.0625, + "completions/mean_terminated_length": 1020.0714721679688, + "completions/min_length": 827.0, + "completions/min_terminated_length": 827.0, + "epoch": 0.17929482370592648, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6841169181192956, + "kl": 0.00687408447265625, + "learning_rate": 9.829887076403867e-07, + "loss": 0.0296, + "num_tokens": 32164963.0, + "reward": 0.0, + "reward_std": 0.9161220192909241, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05316522083654115, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1405488968566896, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10602235962635781, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 717 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1273.0, + "completions/max_terminated_length": 1273.0, + "completions/mean_length": 1025.0, + "completions/mean_terminated_length": 1025.0, + "completions/min_length": 613.0, + "completions/min_terminated_length": 613.0, + "epoch": 0.1795448862215554, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7306076569183344, + "kl": 0.00701141357421875, + "learning_rate": 9.82881559819851e-07, + "loss": -0.0242, + "num_tokens": 32224459.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0101749897003174, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.025130469579767615, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.048259298805496, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04849589520621153, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 718 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1357.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 1139.8125, + "completions/mean_terminated_length": 1139.8125, + "completions/min_length": 956.0, + "completions/min_terminated_length": 956.0, + "epoch": 0.1797949487371843, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5671379229477815, + "kl": 0.00579071044921875, + "learning_rate": 9.827740821579568e-07, + "loss": -0.0171, + "num_tokens": 32280888.0, + "reward": 0.0, + "reward_std": 0.8606716990470886, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.033878012736833954, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.025985164532050346, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1112221667221529, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 719 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1265.0, + "completions/max_terminated_length": 1265.0, + "completions/mean_length": 982.875, + "completions/mean_terminated_length": 982.875, + "completions/min_length": 786.0, + "completions/min_terminated_length": 786.0, + "epoch": 0.1800450112528132, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.550745915255253, + "kl": 0.0057830810546875, + "learning_rate": 9.826662747365983e-07, + "loss": 0.0223, + "num_tokens": 32311606.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0624669790267944, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01761827689511751, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08861577761771147, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07685966046898339, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 720 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1270.0625, + "completions/mean_terminated_length": 1132.0999755859375, + "completions/min_length": 785.0, + "completions/min_terminated_length": 785.0, + "epoch": 0.18029507376844212, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.182133911220646, + "kl": 0.00598907470703125, + "learning_rate": 9.82558137637922e-07, + "loss": -0.0436, + "num_tokens": 32364559.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6436790227890015, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.006640713385245342, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15859883731532523, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1189459883650901, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 721 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1113.0, + "completions/max_terminated_length": 1113.0, + "completions/mean_length": 842.75, + "completions/mean_terminated_length": 842.75, + "completions/min_length": 585.0, + "completions/min_terminated_length": 585.0, + "epoch": 0.180545136284071, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2906889609850847, + "kl": 0.0025787353515625, + "learning_rate": 9.824496709443237e-07, + "loss": -0.0042, + "num_tokens": 32404803.0, + "reward": 0.0, + "reward_std": 0.8606380224227905, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.019272053517227637, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09553023850777322, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0718795288428261, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 722 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1192.0625, + "completions/mean_terminated_length": 1007.2999877929688, + "completions/min_length": 722.0, + "completions/min_terminated_length": 722.0, + "epoch": 0.18079519879969994, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.28878348139633, + "kl": 0.003215789794921875, + "learning_rate": 9.823408747384524e-07, + "loss": -0.077, + "num_tokens": 32447644.0, + "reward": 0.0, + "reward_std": 0.9321693181991577, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.14377646701872282, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05377312760414337, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08062257748298551, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 723 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1377.0, + "completions/mean_length": 1305.9375, + "completions/mean_terminated_length": 1155.0, + "completions/min_length": 946.0, + "completions/min_terminated_length": 946.0, + "epoch": 0.18104526131532883, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.320497941453975, + "kl": 0.00743865966796875, + "learning_rate": 9.822317491032067e-07, + "loss": -0.0357, + "num_tokens": 32505475.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.983498752117157, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09040107139900895, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10836169950908459, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.65, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466156, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 724 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1255.0, + "completions/max_terminated_length": 1255.0, + "completions/mean_length": 1150.5, + "completions/mean_terminated_length": 1150.5, + "completions/min_length": 958.0, + "completions/min_terminated_length": 958.0, + "epoch": 0.18129532383095773, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.2434582824422633, + "kl": 0.001117706298828125, + "learning_rate": 9.821222941217367e-07, + "loss": 0.0043, + "num_tokens": 32546491.0, + "reward": 0.0, + "reward_std": 0.5702938437461853, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.053182843410490746, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14059950918082478, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12412657816683505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 725 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1052.8125, + "completions/mean_terminated_length": 903.75, + "completions/min_length": 676.0, + "completions/min_terminated_length": 676.0, + "epoch": 0.18154538634658665, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9431295879849808, + "kl": 0.0020265579223632812, + "learning_rate": 9.820125098774436e-07, + "loss": -0.0422, + "num_tokens": 32578728.0, + "reward": 0.0, + "reward_std": 0.7283176779747009, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.008942337384263198, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.021496138658367917, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316811, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 726 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1472.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1198.625, + "completions/mean_terminated_length": 1198.625, + "completions/min_length": 1004.0, + "completions/min_terminated_length": 1004.0, + "epoch": 0.18179544886221555, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5474176169800433, + "kl": 0.0079345703125, + "learning_rate": 9.819023964539793e-07, + "loss": -0.0068, + "num_tokens": 32620562.0, + "reward": 0.0, + "reward_std": 0.9600921869277954, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11703831633134315, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1720625618889027, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09339283817414601, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 727 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 1500.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 1500.0, + "completions/min_terminated_length": 0.0, + "epoch": 0.18204551137784447, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2193157397906775, + "kl": 0.0030670166015625, + "learning_rate": 9.817919539352466e-07, + "loss": 0.0001, + "num_tokens": 32678418.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9435696601867676, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009940261462111907, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07133060786480624, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194864, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 728 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1393.0, + "completions/mean_length": 1150.75, + "completions/mean_terminated_length": 1100.857177734375, + "completions/min_length": 814.0, + "completions/min_terminated_length": 814.0, + "epoch": 0.18229557389347337, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.242933170732582, + "kl": 0.01019287109375, + "learning_rate": 9.816811824053988e-07, + "loss": 0.0201, + "num_tokens": 32741190.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0447089672088623, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05750224395736582, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15834521967317955, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12041594578792297, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 729 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1214.75, + "completions/mean_terminated_length": 1148.923095703125, + "completions/min_length": 938.0, + "completions/min_terminated_length": 938.0, + "epoch": 0.18254563640910226, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7361219694233747, + "kl": 0.004390716552734375, + "learning_rate": 9.815700819488406e-07, + "loss": -0.0349, + "num_tokens": 32796730.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0677757263183594, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08281647264120397, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.055091018848567645, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08255189164891874, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 730 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1151.6875, + "completions/mean_terminated_length": 1128.4666748046875, + "completions/min_length": 795.0, + "completions/min_terminated_length": 795.0, + "epoch": 0.1827956989247312, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.748835390365423, + "kl": 0.00283050537109375, + "learning_rate": 9.814586526502263e-07, + "loss": -0.0144, + "num_tokens": 32855677.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.060882329940796, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03831599749094185, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1052655506937671, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.061913918736689035, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 731 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1367.6875, + "completions/mean_terminated_length": 1235.375, + "completions/min_length": 959.0, + "completions/min_terminated_length": 959.0, + "epoch": 0.18304576144036008, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.998704562710288, + "kl": 0.002902984619140625, + "learning_rate": 9.813468945944618e-07, + "loss": -0.0591, + "num_tokens": 32906320.0, + "reward": 0.0, + "reward_std": 0.7829744815826416, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0424517532520707, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07791503468793178, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1403039029577766, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 732 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1360.0, + "completions/mean_length": 1238.6875, + "completions/mean_terminated_length": 902.71435546875, + "completions/min_length": 533.0, + "completions/min_terminated_length": 533.0, + "epoch": 0.183295823955989, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.819957059240688, + "kl": 0.0040130615234375, + "learning_rate": 9.812348078667027e-07, + "loss": -0.0256, + "num_tokens": 32950307.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9241190552711487, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06722534884069706, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14160736354951808, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10174405069512343, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 733 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1190.1875, + "completions/mean_terminated_length": 1049.3636474609375, + "completions/min_length": 872.0, + "completions/min_terminated_length": 872.0, + "epoch": 0.1835458864716179, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4498638495387435, + "kl": 0.0069580078125, + "learning_rate": 9.811223925523554e-07, + "loss": -0.0357, + "num_tokens": 33002398.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5122545957565308, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08607074641339456, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09012592474278529, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09428090415820634, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 734 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1158.5, + "completions/mean_terminated_length": 1135.7333984375, + "completions/min_length": 970.0, + "completions/min_terminated_length": 970.0, + "epoch": 0.1837959489872468, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3427020733436494, + "kl": 0.00605010986328125, + "learning_rate": 9.810096487370767e-07, + "loss": -0.0063, + "num_tokens": 33045406.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0441040992736816, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04200349924354796, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0798675152124159, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.048686449556014755, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 735 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 1229.375, + "completions/mean_terminated_length": 1067.0, + "completions/min_length": 822.0, + "completions/min_terminated_length": 822.0, + "epoch": 0.18404601150287572, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.231655287047318, + "kl": 0.00547027587890625, + "learning_rate": 9.808965765067735e-07, + "loss": -0.0234, + "num_tokens": 33096916.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0644407272338867, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.2624126965101883, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.33578714643114277, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13924399049470285, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 736 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1406.0, + "completions/mean_length": 1164.8125, + "completions/mean_terminated_length": 1142.4666748046875, + "completions/min_length": 866.0, + "completions/min_terminated_length": 866.0, + "epoch": 0.18429607401850462, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3815497285710925, + "kl": 0.00667572021484375, + "learning_rate": 9.807831759476036e-07, + "loss": -0.0117, + "num_tokens": 33144425.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.042811632156372, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02033659139505253, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04612926690576004, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09953596037316068, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 737 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1312.0, + "completions/mean_length": 1060.125, + "completions/mean_terminated_length": 958.6154174804688, + "completions/min_length": 581.0, + "completions/min_terminated_length": 581.0, + "epoch": 0.18454613653413354, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0111274009719566, + "kl": 0.0052337646484375, + "learning_rate": 9.806694471459739e-07, + "loss": 0.0451, + "num_tokens": 33191035.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0592710971832275, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.002183642725055812, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06974739434689296, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07490735018081411, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 738 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1326.25, + "completions/mean_terminated_length": 1152.5, + "completions/min_length": 753.0, + "completions/min_terminated_length": 753.0, + "epoch": 0.18479619904976244, + "frac_reward_zero_std": 0.0, + "grad_norm": 18.468800813859428, + "kl": 0.1162109375, + "learning_rate": 9.80555390188542e-07, + "loss": -0.0783, + "num_tokens": 33247943.0, + "reward": 0.0, + "reward_std": 0.7780887484550476, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.18433175981064187, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.26330838996163947, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746357, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 739 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1211.0, + "completions/mean_length": 970.5, + "completions/mean_terminated_length": 935.2000732421875, + "completions/min_length": 615.0, + "completions/min_terminated_length": 615.0, + "epoch": 0.18504626156539133, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.416763838596435, + "kl": 0.005710601806640625, + "learning_rate": 9.804410051622157e-07, + "loss": -0.0414, + "num_tokens": 33286567.0, + "reward": 0.0, + "reward_std": 0.9934239983558655, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04075272404705677, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05015983631946827, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06652763279965646, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 740 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1346.0, + "completions/max_terminated_length": 1346.0, + "completions/mean_length": 1171.875, + "completions/mean_terminated_length": 1171.875, + "completions/min_length": 859.0, + "completions/min_terminated_length": 859.0, + "epoch": 0.18529632408102026, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8122015770909785, + "kl": 0.00487518310546875, + "learning_rate": 9.803262921541528e-07, + "loss": 0.0178, + "num_tokens": 33328541.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.47839343547821045, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.042920349209257906, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07186635923476138, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14807405554629052, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 741 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1234.0, + "completions/max_terminated_length": 1234.0, + "completions/mean_length": 1016.8125, + "completions/mean_terminated_length": 1016.8125, + "completions/min_length": 701.0, + "completions/min_terminated_length": 701.0, + "epoch": 0.18554638659664915, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2145115794826284, + "kl": 0.0057525634765625, + "learning_rate": 9.802112512517602e-07, + "loss": -0.0011, + "num_tokens": 33364386.0, + "reward": 0.0, + "reward_std": 0.9832525253295898, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.050124131177129626, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21063680386644926, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1112221667221529, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 742 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1133.0, + "completions/max_terminated_length": 1133.0, + "completions/mean_length": 929.3125, + "completions/mean_terminated_length": 929.3125, + "completions/min_length": 609.0, + "completions/min_terminated_length": 609.0, + "epoch": 0.18579644911227808, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3057530815007294, + "kl": 0.0030193328857421875, + "learning_rate": 9.800958825426957e-07, + "loss": 0.0437, + "num_tokens": 33393671.0, + "reward": 0.0, + "reward_std": 0.904248833656311, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.043688346469297965, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07085727903795395, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07302967433402217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 743 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1338.0, + "completions/mean_length": 1011.5625, + "completions/mean_terminated_length": 848.75, + "completions/min_length": 722.0, + "completions/min_terminated_length": 722.0, + "epoch": 0.18604651162790697, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2738937180284684, + "kl": 0.003101348876953125, + "learning_rate": 9.799801861148664e-07, + "loss": -0.0206, + "num_tokens": 33427632.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0394505262374878, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09399952783546645, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11644827922446022, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 744 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1365.0, + "completions/mean_length": 1137.5, + "completions/mean_terminated_length": 1085.71435546875, + "completions/min_length": 889.0, + "completions/min_terminated_length": 889.0, + "epoch": 0.18629657414353587, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0650345743673544, + "kl": 0.00585174560546875, + "learning_rate": 9.79864162056429e-07, + "loss": -0.0025, + "num_tokens": 33470136.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9372832775115967, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.006921612982398085, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.021359135129951135, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242309, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 745 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1412.0, + "completions/max_terminated_length": 1412.0, + "completions/mean_length": 1121.8125, + "completions/mean_terminated_length": 1121.8125, + "completions/min_length": 955.0, + "completions/min_terminated_length": 955.0, + "epoch": 0.1865466366591648, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.122625548014598, + "kl": 0.00438690185546875, + "learning_rate": 9.797478104557898e-07, + "loss": 0.0018, + "num_tokens": 33513957.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.4723026752471924, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.13464675386404207, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0747943558290234, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1264911064067352, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 746 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1340.0, + "completions/max_terminated_length": 1340.0, + "completions/mean_length": 1081.0, + "completions/mean_terminated_length": 1081.0, + "completions/min_length": 853.0, + "completions/min_terminated_length": 853.0, + "epoch": 0.1867966991747937, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.34960657208965, + "kl": 0.00260162353515625, + "learning_rate": 9.796311314016053e-07, + "loss": -0.0287, + "num_tokens": 33549557.0, + "reward": -5.21540641784668e-08, + "reward_std": 1.0570249557495117, + "rewards/wordcountpos_reward_GEOBench/mean": -5.21540641784668e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08874766055093594, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11555375335357478, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 747 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1017.0, + "completions/max_terminated_length": 1017.0, + "completions/mean_length": 950.4375, + "completions/mean_terminated_length": 950.4375, + "completions/min_length": 859.0, + "completions/min_terminated_length": 859.0, + "epoch": 0.1870467616904226, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.5741706699359104, + "kl": 0.0019321441650390625, + "learning_rate": 9.795141249827807e-07, + "loss": -0.0038, + "num_tokens": 33581884.0, + "reward": 0.0, + "reward_std": 0.9222838878631592, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.041798627727187, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08488795145417802, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11344765475923412, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 748 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1421.0, + "completions/mean_length": 1050.875, + "completions/mean_terminated_length": 1020.9334106445312, + "completions/min_length": 625.0, + "completions/min_terminated_length": 625.0, + "epoch": 0.1872968242060515, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.260750225835077, + "kl": 0.005340576171875, + "learning_rate": 9.793967912884713e-07, + "loss": -0.0666, + "num_tokens": 33630210.0, + "reward": 0.0, + "reward_std": 0.4019111096858978, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.082462785160942, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13403575766035725, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09803627446568497, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 749 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1336.0, + "completions/mean_length": 1045.5, + "completions/mean_terminated_length": 940.6154174804688, + "completions/min_length": 603.0, + "completions/min_terminated_length": 603.0, + "epoch": 0.18754688672168043, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6077526682443324, + "kl": 0.00556182861328125, + "learning_rate": 9.792791304080812e-07, + "loss": -0.0063, + "num_tokens": 33673594.0, + "reward": 0.0, + "reward_std": 0.7919954061508179, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0173807072481657, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1464043901598514, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.65, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11547005383792516, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 750 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1403.0, + "completions/mean_length": 1116.75, + "completions/mean_terminated_length": 1091.2000732421875, + "completions/min_length": 877.0, + "completions/min_terminated_length": 877.0, + "epoch": 0.18779694923730933, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4009377519527804, + "kl": 0.00592041015625, + "learning_rate": 9.791611424312642e-07, + "loss": 0.0355, + "num_tokens": 33725566.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.0569311380386353, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0463683801220161, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08293067812961104, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05146016078626404, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 751 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1368.0, + "completions/mean_length": 1394.0, + "completions/mean_terminated_length": 1160.800048828125, + "completions/min_length": 982.0, + "completions/min_terminated_length": 982.0, + "epoch": 0.18804701175293823, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.59931882175537, + "kl": 0.00464630126953125, + "learning_rate": 9.790428274479232e-07, + "loss": -0.0172, + "num_tokens": 33773046.0, + "reward": 0.0, + "reward_std": 0.8995012044906616, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04364249190376477, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13247925550449974, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1128748897706693, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 752 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1327.0, + "completions/mean_length": 1094.875, + "completions/mean_terminated_length": 1067.86669921875, + "completions/min_length": 768.0, + "completions/min_terminated_length": 768.0, + "epoch": 0.18829707426856715, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.579477653679744, + "kl": 0.00666046142578125, + "learning_rate": 9.789241855482101e-07, + "loss": -0.0109, + "num_tokens": 33810436.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7825042009353638, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09746143756698154, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12352679057806955, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05288001793018134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 753 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1411.0, + "completions/max_terminated_length": 1411.0, + "completions/mean_length": 1082.1875, + "completions/mean_terminated_length": 1082.1875, + "completions/min_length": 949.0, + "completions/min_terminated_length": 949.0, + "epoch": 0.18854713678419605, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8734100753719083, + "kl": 0.0027713775634765625, + "learning_rate": 9.788052168225263e-07, + "loss": -0.0472, + "num_tokens": 33853799.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8815306425094604, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07636630696159298, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08869742462224303, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09737289911202955, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 754 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1425.0, + "completions/mean_length": 1267.0625, + "completions/mean_terminated_length": 1233.7857666015625, + "completions/min_length": 998.0, + "completions/min_terminated_length": 998.0, + "epoch": 0.18879719929982497, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.036091211979058, + "kl": 0.00536346435546875, + "learning_rate": 9.78685921361522e-07, + "loss": -0.0276, + "num_tokens": 33906104.0, + "reward": 0.0, + "reward_std": 0.8688238859176636, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03977344691935024, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.027909715061306753, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620105, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 755 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1220.0, + "completions/max_terminated_length": 1220.0, + "completions/mean_length": 902.6875, + "completions/mean_terminated_length": 902.6875, + "completions/min_length": 637.0, + "completions/min_terminated_length": 637.0, + "epoch": 0.18904726181545387, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3338262592899786, + "kl": 0.00505828857421875, + "learning_rate": 9.785662992560965e-07, + "loss": -0.0528, + "num_tokens": 33935819.0, + "reward": 0.0, + "reward_std": 0.7001897096633911, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.16521999616251332, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2993398128396873, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14375905768565217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 756 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1330.0, + "completions/max_terminated_length": 1330.0, + "completions/mean_length": 1036.125, + "completions/mean_terminated_length": 1036.125, + "completions/min_length": 862.0, + "completions/min_terminated_length": 862.0, + "epoch": 0.18929732433108276, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6780908940070804, + "kl": 0.003551483154296875, + "learning_rate": 9.784463505973978e-07, + "loss": -0.0459, + "num_tokens": 33973661.0, + "reward": 0.0, + "reward_std": 0.7571854591369629, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05177551332944912, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1280877652417197, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06426219440409447, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 757 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1036.0, + "completions/max_terminated_length": 1036.0, + "completions/mean_length": 756.125, + "completions/mean_terminated_length": 756.125, + "completions/min_length": 493.0, + "completions/min_terminated_length": 493.0, + "epoch": 0.18954738684671169, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.660220980204215, + "kl": 0.00688934326171875, + "learning_rate": 9.783260754768224e-07, + "loss": 0.0482, + "num_tokens": 34009159.0, + "reward": 0.0, + "reward_std": 0.801722526550293, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.030484680167434323, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07098127912457575, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10610965676722953, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 758 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1396.0, + "completions/mean_length": 1123.6875, + "completions/mean_terminated_length": 952.6364135742188, + "completions/min_length": 657.0, + "completions/min_terminated_length": 657.0, + "epoch": 0.18979744936234058, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8857401735743746, + "kl": 0.00478363037109375, + "learning_rate": 9.782054739860163e-07, + "loss": -0.0159, + "num_tokens": 34055578.0, + "reward": 0.0, + "reward_std": 0.7229069471359253, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.2354839586385386, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.26296654418317406, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0642621944040945, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 759 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1342.5, + "completions/mean_terminated_length": 1220.0, + "completions/min_length": 1066.0, + "completions/min_terminated_length": 1066.0, + "epoch": 0.1900475118779695, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8581999003436978, + "kl": 0.00440216064453125, + "learning_rate": 9.78084546216874e-07, + "loss": -0.0019, + "num_tokens": 34109418.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.999096155166626, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02402238429898502, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05047151105415464, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1520233900132184, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 760 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1491.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1203.5, + "completions/mean_terminated_length": 1203.5, + "completions/min_length": 880.0, + "completions/min_terminated_length": 880.0, + "epoch": 0.1902975743935984, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.585307131556703, + "kl": 0.00785064697265625, + "learning_rate": 9.77963292261538e-07, + "loss": 0.0106, + "num_tokens": 34155186.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9448951482772827, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12624460748158114, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1263113780258288, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12224747213928168, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 761 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1417.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1244.0625, + "completions/mean_terminated_length": 1244.0625, + "completions/min_length": 1056.0, + "completions/min_terminated_length": 1056.0, + "epoch": 0.1905476369092273, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.336458640011638, + "kl": 0.00815582275390625, + "learning_rate": 9.778417122124002e-07, + "loss": -0.0216, + "num_tokens": 34205779.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9397013187408447, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08958149201369565, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08981750603246451, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0958393717904348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 762 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1404.0, + "completions/mean_length": 1229.125, + "completions/mean_terminated_length": 1166.615478515625, + "completions/min_length": 956.0, + "completions/min_terminated_length": 956.0, + "epoch": 0.19079769942485622, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.823257904779279, + "kl": 0.00473785400390625, + "learning_rate": 9.777198061621001e-07, + "loss": 0.0136, + "num_tokens": 34253637.0, + "reward": 0.0, + "reward_std": 0.898926854133606, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0015722100971268783, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07436289307193161, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10886621079036349, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 763 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1421.0, + "completions/mean_length": 987.5625, + "completions/mean_terminated_length": 953.4000244140625, + "completions/min_length": 690.0, + "completions/min_terminated_length": 690.0, + "epoch": 0.19104776194048512, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4779493444108303, + "kl": 0.00440216064453125, + "learning_rate": 9.775975742035264e-07, + "loss": 0.0166, + "num_tokens": 34287350.0, + "reward": 0.0, + "reward_std": 0.6220771074295044, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05113912029521954, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.031110641376222836, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1214114522635354, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 764 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1384.3125, + "completions/mean_terminated_length": 1357.615478515625, + "completions/min_length": 958.0, + "completions/min_terminated_length": 958.0, + "epoch": 0.19129782445611404, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8196497984646303, + "kl": 0.00301361083984375, + "learning_rate": 9.774750164298158e-07, + "loss": -0.0255, + "num_tokens": 34347483.0, + "reward": 0.0, + "reward_std": 0.7921708226203918, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08389881263928206, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19504925008106058, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04849589520621153, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 765 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1272.3125, + "completions/mean_terminated_length": 1219.769287109375, + "completions/min_length": 1011.0, + "completions/min_terminated_length": 1011.0, + "epoch": 0.19154788697174294, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.796661759974169, + "kl": 0.0064239501953125, + "learning_rate": 9.77352132934353e-07, + "loss": -0.0928, + "num_tokens": 34398400.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8572706580162048, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04189881519089716, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09426900176535, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07290277645477447, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 766 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1296.0, + "completions/max_terminated_length": 1296.0, + "completions/mean_length": 1012.0, + "completions/mean_terminated_length": 1012.0, + "completions/min_length": 790.0, + "completions/min_terminated_length": 790.0, + "epoch": 0.19179794948737183, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.648599515827048, + "kl": 0.00684356689453125, + "learning_rate": 9.772289238107714e-07, + "loss": -0.0585, + "num_tokens": 34436456.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6676443219184875, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04432162729021421, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05077108140198264, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13763881881375054, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 767 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1432.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 941.5625, + "completions/mean_terminated_length": 941.5625, + "completions/min_length": 561.0, + "completions/min_terminated_length": 561.0, + "epoch": 0.19204801200300076, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.42710037829366, + "kl": 0.005767822265625, + "learning_rate": 9.771053891529522e-07, + "loss": -0.0104, + "num_tokens": 34466265.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.6699692010879517, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1519624710005487, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 768 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1081.625, + "completions/mean_terminated_length": 1053.7333984375, + "completions/min_length": 821.0, + "completions/min_terminated_length": 821.0, + "epoch": 0.19229807451862965, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0950095698804243, + "kl": 0.00524139404296875, + "learning_rate": 9.769815290550247e-07, + "loss": 0.0098, + "num_tokens": 34509563.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9110170602798462, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.058465818928021634, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15616305960487858, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238706, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 769 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1103.0625, + "completions/mean_terminated_length": 1011.4615478515625, + "completions/min_length": 602.0, + "completions/min_terminated_length": 602.0, + "epoch": 0.19254813703425858, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9463316693169084, + "kl": 0.00342559814453125, + "learning_rate": 9.76857343611366e-07, + "loss": 0.015, + "num_tokens": 34553700.0, + "reward": 0.0, + "reward_std": 0.2281189262866974, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.2392460331618483, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.24302124733962108, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11894598836509011, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 770 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1248.6875, + "completions/mean_terminated_length": 1164.916748046875, + "completions/min_length": 821.0, + "completions/min_terminated_length": 821.0, + "epoch": 0.19279819954988747, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.08684980642056, + "kl": 0.006317138671875, + "learning_rate": 9.767328329166014e-07, + "loss": 0.0259, + "num_tokens": 34602287.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0022685527801514, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0925098613439011, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05833707694707505, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1287403358472941, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 771 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1426.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1002.8125, + "completions/mean_terminated_length": 1002.8125, + "completions/min_length": 658.0, + "completions/min_terminated_length": 658.0, + "epoch": 0.19304826206551637, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7688446055324336, + "kl": 0.00395965576171875, + "learning_rate": 9.76607997065604e-07, + "loss": -0.0077, + "num_tokens": 34643060.0, + "reward": 0.0, + "reward_std": 0.8697834014892578, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05352356193185525, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1368391119788751, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12641788434189796, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 772 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1319.875, + "completions/mean_terminated_length": 1259.8333740234375, + "completions/min_length": 811.0, + "completions/min_terminated_length": 811.0, + "epoch": 0.1932983245811453, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9255521178435453, + "kl": 0.0050811767578125, + "learning_rate": 9.764828361534945e-07, + "loss": -0.0256, + "num_tokens": 34700114.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9583911895751953, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0937513035346663, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05214142643929856, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06191391873668906, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 773 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1171.0, + "completions/max_terminated_length": 1171.0, + "completions/mean_length": 996.625, + "completions/mean_terminated_length": 996.625, + "completions/min_length": 795.0, + "completions/min_terminated_length": 795.0, + "epoch": 0.1935483870967742, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4070277813551235, + "kl": 0.0075225830078125, + "learning_rate": 9.763573502756414e-07, + "loss": 0.0182, + "num_tokens": 34744068.0, + "reward": 0.0, + "reward_std": 0.8290859460830688, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07904164124028745, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08867501583133151, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 774 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1136.4375, + "completions/mean_terminated_length": 1084.5, + "completions/min_length": 779.0, + "completions/min_terminated_length": 779.0, + "epoch": 0.1937984496124031, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.374852555016247, + "kl": 0.003948211669921875, + "learning_rate": 9.762315395276605e-07, + "loss": 0.0348, + "num_tokens": 34792171.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8454338908195496, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019708511131110037, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05723644134720568, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901858, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 775 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1487.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1265.6875, + "completions/mean_terminated_length": 1265.6875, + "completions/min_length": 1067.0, + "completions/min_terminated_length": 1067.0, + "epoch": 0.194048512128032, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.455640713456896, + "kl": 0.00699615478515625, + "learning_rate": 9.76105404005416e-07, + "loss": -0.0119, + "num_tokens": 34838414.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5210832357406616, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.27378134135982485, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.28662402148228705, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08062257748298553, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 776 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1311.8125, + "completions/mean_terminated_length": 1226.272705078125, + "completions/min_length": 929.0, + "completions/min_terminated_length": 929.0, + "epoch": 0.1942985746436609, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5819744549931194, + "kl": 0.0087738037109375, + "learning_rate": 9.75978943805018e-07, + "loss": -0.0946, + "num_tokens": 34890035.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.966614305973053, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05988217910005238, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08940491656366094, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11213417888437975, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 777 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1452.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1122.8125, + "completions/mean_terminated_length": 1122.8125, + "completions/min_length": 680.0, + "completions/min_terminated_length": 680.0, + "epoch": 0.19454863715928983, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.448770982069352, + "kl": 0.001934051513671875, + "learning_rate": 9.758521590228259e-07, + "loss": 0.0496, + "num_tokens": 34926536.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.4485316872596741, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.3233125584511452, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2168428692609099, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16638865702079933, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 778 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1194.0, + "completions/max_terminated_length": 1194.0, + "completions/mean_length": 955.875, + "completions/mean_terminated_length": 955.875, + "completions/min_length": 729.0, + "completions/min_terminated_length": 729.0, + "epoch": 0.19479869967491872, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2976615635966047, + "kl": 0.004756927490234375, + "learning_rate": 9.757250497554451e-07, + "loss": -0.0201, + "num_tokens": 34969174.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0640275478363037, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07778321113687948, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10352705056746363, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10390522747338703, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 779 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1419.0, + "completions/mean_length": 1175.9375, + "completions/mean_terminated_length": 1028.6363525390625, + "completions/min_length": 817.0, + "completions/min_terminated_length": 817.0, + "epoch": 0.19504876219054765, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6966983513298572, + "kl": 0.004276275634765625, + "learning_rate": 9.755976160997289e-07, + "loss": -0.0132, + "num_tokens": 35026149.0, + "reward": 0.0, + "reward_std": 0.9740884304046631, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02139420407473786, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.022310670092147947, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941135, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 780 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1472.5, + "completions/mean_terminated_length": 1412.0, + "completions/min_length": 1222.0, + "completions/min_terminated_length": 1222.0, + "epoch": 0.19529882470617654, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7184128411751596, + "kl": 0.00482177734375, + "learning_rate": 9.75469858152777e-07, + "loss": 0.001, + "num_tokens": 35086077.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7493516802787781, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.021356019889026398, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15075920317301078, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06763190130459204, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 781 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1174.6875, + "completions/mean_terminated_length": 1099.615478515625, + "completions/min_length": 861.0, + "completions/min_terminated_length": 861.0, + "epoch": 0.19554888722180544, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.905215181043459, + "kl": 0.005100250244140625, + "learning_rate": 9.753417760119372e-07, + "loss": -0.0508, + "num_tokens": 35129544.0, + "reward": -2.2351741790771484e-08, + "reward_std": 0.9570135474205017, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.031115357620785922, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06245959906751957, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09888264649460886, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 782 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1255.4375, + "completions/mean_terminated_length": 1239.1334228515625, + "completions/min_length": 1029.0, + "completions/min_terminated_length": 1029.0, + "epoch": 0.19579894973743436, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.256226471059028, + "kl": 0.01361083984375, + "learning_rate": 9.752133697748034e-07, + "loss": -0.0362, + "num_tokens": 35183487.0, + "reward": 0.0, + "reward_std": 0.6491416692733765, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.037269011827991846, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08178369493522963, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 783 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1348.0, + "completions/max_terminated_length": 1348.0, + "completions/mean_length": 1182.6875, + "completions/mean_terminated_length": 1182.6875, + "completions/min_length": 968.0, + "completions/min_terminated_length": 968.0, + "epoch": 0.19604901225306326, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.390657488638133, + "kl": 0.003742218017578125, + "learning_rate": 9.750846395392174e-07, + "loss": -0.0245, + "num_tokens": 35230002.0, + "reward": 0.0, + "reward_std": 0.9859509468078613, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12500342111521479, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11580758524749729, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05821416398857663, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 784 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1216.0, + "completions/max_terminated_length": 1216.0, + "completions/mean_length": 942.625, + "completions/mean_terminated_length": 942.625, + "completions/min_length": 613.0, + "completions/min_terminated_length": 613.0, + "epoch": 0.19629907476869218, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3987331222182817, + "kl": 0.0058746337890625, + "learning_rate": 9.74955585403267e-07, + "loss": -0.0277, + "num_tokens": 35275612.0, + "reward": 0.0, + "reward_std": 0.7244665622711182, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.038334246468655804, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15936062070843363, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258096, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 785 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1153.0, + "completions/max_terminated_length": 1153.0, + "completions/mean_length": 1035.9375, + "completions/mean_terminated_length": 1035.9375, + "completions/min_length": 880.0, + "completions/min_terminated_length": 880.0, + "epoch": 0.19654913728432108, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2083676799809377, + "kl": 0.00504302978515625, + "learning_rate": 9.748262074652871e-07, + "loss": 0.0267, + "num_tokens": 35314019.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.7835050821304321, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.048525094829570875, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.058974631431651235, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09727776191382574, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 786 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1302.1875, + "completions/mean_terminated_length": 1289.0001220703125, + "completions/min_length": 1008.0, + "completions/min_terminated_length": 1008.0, + "epoch": 0.19679919979994998, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1798868968548346, + "kl": 0.0066680908203125, + "learning_rate": 9.746965058238597e-07, + "loss": -0.0545, + "num_tokens": 35362958.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9166634678840637, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00430408009668512, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0197332004346436, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 787 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1271.0, + "completions/mean_length": 1316.875, + "completions/mean_terminated_length": 1081.4285888671875, + "completions/min_length": 778.0, + "completions/min_terminated_length": 778.0, + "epoch": 0.1970492623155789, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6503387178891087, + "kl": 0.0055084228515625, + "learning_rate": 9.74566480577813e-07, + "loss": -0.025, + "num_tokens": 35408868.0, + "reward": 0.0, + "reward_std": 0.9682281017303467, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0036836197797909306, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09225086343595418, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1437590576856522, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 788 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.75, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1445.25, + "completions/mean_terminated_length": 1281.0, + "completions/min_length": 695.0, + "completions/min_terminated_length": 695.0, + "epoch": 0.1972993248312078, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1338051832833966, + "kl": 0.006336212158203125, + "learning_rate": 9.744361318262216e-07, + "loss": 0.0179, + "num_tokens": 35465784.0, + "reward": 0.0, + "reward_std": 0.7411366701126099, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.19950779608251143, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20747878100680947, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1018350154434631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 789 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1494.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1230.8125, + "completions/mean_terminated_length": 1230.8125, + "completions/min_length": 816.0, + "completions/min_terminated_length": 816.0, + "epoch": 0.19754938734683672, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.395292977069102, + "kl": 0.003322601318359375, + "learning_rate": 9.743054596684074e-07, + "loss": -0.0329, + "num_tokens": 35517165.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8714635372161865, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.007320954988949741, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08264734535517476, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921948, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 790 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1221.0, + "completions/max_terminated_length": 1221.0, + "completions/mean_length": 1041.125, + "completions/mean_terminated_length": 1041.125, + "completions/min_length": 839.0, + "completions/min_terminated_length": 839.0, + "epoch": 0.19779944986246562, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4033088962791496, + "kl": 0.0065460205078125, + "learning_rate": 9.74174464203938e-07, + "loss": 0.0028, + "num_tokens": 35548911.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0551389455795288, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.044343332770688394, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10528654935910003, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09496588081262936, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 791 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1323.0, + "completions/mean_length": 1205.125, + "completions/mean_terminated_length": 1137.0770263671875, + "completions/min_length": 561.0, + "completions/min_terminated_length": 561.0, + "epoch": 0.1980495123780945, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.775172951447881, + "kl": 0.003673553466796875, + "learning_rate": 9.740431455326277e-07, + "loss": -0.0146, + "num_tokens": 35606353.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.4133055806159973, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02393766554726974, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04840274681681225, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.18358568490953675, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 792 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1262.9375, + "completions/mean_terminated_length": 1120.7000732421875, + "completions/min_length": 757.0, + "completions/min_terminated_length": 757.0, + "epoch": 0.19829957489372343, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.910847284276466, + "kl": 0.0073699951171875, + "learning_rate": 9.73911503754537e-07, + "loss": 0.0516, + "num_tokens": 35662312.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0245856046676636, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0613803610860164, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0406555594847558, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12164002752505564, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 793 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1191.4375, + "completions/mean_terminated_length": 1120.2308349609375, + "completions/min_length": 778.0, + "completions/min_terminated_length": 778.0, + "epoch": 0.19854963740935233, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8650181874549507, + "kl": 0.00533294677734375, + "learning_rate": 9.737795389699721e-07, + "loss": -0.0663, + "num_tokens": 35707367.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8280187845230103, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06189318236616637, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04722609146231109, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12102953419784838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 794 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1492.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1243.1875, + "completions/mean_terminated_length": 1243.1875, + "completions/min_length": 1015.0, + "completions/min_terminated_length": 1015.0, + "epoch": 0.19879969992498125, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.101025455118715, + "kl": 0.00661468505859375, + "learning_rate": 9.736472512794864e-07, + "loss": -0.0219, + "num_tokens": 35753282.0, + "reward": 0.0, + "reward_std": 0.8394339680671692, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.024696497875071437, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.060198756738365534, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10390522747338701, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 795 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1134.8125, + "completions/mean_terminated_length": 1082.6429443359375, + "completions/min_length": 805.0, + "completions/min_terminated_length": 805.0, + "epoch": 0.19904976244061015, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2961792034457194, + "kl": 0.00566864013671875, + "learning_rate": 9.735146407838784e-07, + "loss": -0.0287, + "num_tokens": 35799439.0, + "reward": 0.0, + "reward_std": 0.6710816621780396, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08245961507336123, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1106415889001064, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08073734277593311, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 796 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1421.0, + "completions/mean_length": 1197.5, + "completions/mean_terminated_length": 1127.6923828125, + "completions/min_length": 744.0, + "completions/min_terminated_length": 744.0, + "epoch": 0.19929982495623905, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0813248139218117, + "kl": 0.00626373291015625, + "learning_rate": 9.73381707584193e-07, + "loss": -0.0906, + "num_tokens": 35842631.0, + "reward": 0.0, + "reward_std": 0.7612572908401489, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1134279919162518, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2017681617436829, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0969917904124231, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 797 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1176.5, + "completions/mean_terminated_length": 1154.933349609375, + "completions/min_length": 927.0, + "completions/min_terminated_length": 927.0, + "epoch": 0.19954988747186797, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.495750480750484, + "kl": 0.00690460205078125, + "learning_rate": 9.732484517817207e-07, + "loss": 0.0149, + "num_tokens": 35886815.0, + "reward": 0.0, + "reward_std": 0.8409204483032227, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.021775580067785294, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03360903207207181, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13601470508735444, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 798 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1450.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1077.625, + "completions/mean_terminated_length": 1077.625, + "completions/min_length": 821.0, + "completions/min_terminated_length": 821.0, + "epoch": 0.19979994998749687, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.147725262723311, + "kl": 0.00563812255859375, + "learning_rate": 9.731148734779982e-07, + "loss": -0.0125, + "num_tokens": 35932177.0, + "reward": 0.0, + "reward_std": 0.566481351852417, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.028325774073501998, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11805857117447109, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 799 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1215.25, + "completions/mean_terminated_length": 1149.5384521484375, + "completions/min_length": 705.0, + "completions/min_terminated_length": 705.0, + "epoch": 0.2000500125031258, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.57006993755213, + "kl": 0.003559112548828125, + "learning_rate": 9.729809727748076e-07, + "loss": -0.0303, + "num_tokens": 35965317.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.0602483749389648, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03201629611498782, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10361174350206746, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 800 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1125.0, + "completions/max_terminated_length": 1125.0, + "completions/mean_length": 837.4375, + "completions/mean_terminated_length": 837.4375, + "completions/min_length": 690.0, + "completions/min_terminated_length": 690.0, + "epoch": 0.2003000750187547, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.499021223684371, + "kl": 0.00711822509765625, + "learning_rate": 9.72846749774177e-07, + "loss": 0.0142, + "num_tokens": 35993636.0, + "reward": 0.0, + "reward_std": 1.0001115798950195, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.046392646552409406, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1252872581153282, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 801 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1262.5, + "completions/mean_terminated_length": 1183.3333740234375, + "completions/min_length": 1035.0, + "completions/min_terminated_length": 1035.0, + "epoch": 0.20055013753438358, + "frac_reward_zero_std": 0.5, + "grad_norm": 2.631770047684844, + "kl": 0.0047454833984375, + "learning_rate": 9.727122045783793e-07, + "loss": 0.0254, + "num_tokens": 36042540.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.737403929233551, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06039984196366317, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09788376867407403, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087684, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 802 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1493.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1034.1875, + "completions/mean_terminated_length": 1034.1875, + "completions/min_length": 655.0, + "completions/min_terminated_length": 655.0, + "epoch": 0.2008002000500125, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0431816585920495, + "kl": 0.00656890869140625, + "learning_rate": 9.725773372899342e-07, + "loss": 0.0343, + "num_tokens": 36079159.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.029931902885437, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.021964842509053076, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.047607807133398905, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1376388188137505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 803 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1110.0, + "completions/max_terminated_length": 1110.0, + "completions/mean_length": 790.6875, + "completions/mean_terminated_length": 790.6875, + "completions/min_length": 497.0, + "completions/min_terminated_length": 497.0, + "epoch": 0.2010502625656414, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7273896143789815, + "kl": 0.0040130615234375, + "learning_rate": 9.724421480116056e-07, + "loss": -0.0744, + "num_tokens": 36107722.0, + "reward": 0.0, + "reward_std": 0.9945021271705627, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.14700930423000522, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13705879365317356, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941139, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 804 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1435.3125, + "completions/mean_terminated_length": 1370.625, + "completions/min_length": 1258.0, + "completions/min_terminated_length": 1258.0, + "epoch": 0.20130032508127033, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7299710271826463, + "kl": 0.0054473876953125, + "learning_rate": 9.723066368464033e-07, + "loss": 0.0018, + "num_tokens": 36159799.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9677935838699341, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07074827908145391, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.085834071668334, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09496588081262936, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 805 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1342.0, + "completions/max_terminated_length": 1342.0, + "completions/mean_length": 1094.375, + "completions/mean_terminated_length": 1094.375, + "completions/min_length": 649.0, + "completions/min_terminated_length": 649.0, + "epoch": 0.20155038759689922, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.588299107553388, + "kl": 0.00783538818359375, + "learning_rate": 9.721708038975824e-07, + "loss": -0.0451, + "num_tokens": 36204797.0, + "reward": -3.725290298461914e-08, + "reward_std": 1.0461629629135132, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01696854695416679, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.042401682182747154, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08777074514725111, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 806 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1268.0, + "completions/max_terminated_length": 1268.0, + "completions/mean_length": 928.25, + "completions/mean_terminated_length": 928.25, + "completions/min_length": 628.0, + "completions/min_terminated_length": 628.0, + "epoch": 0.20180045011252815, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.895568966636918, + "kl": 0.00799560546875, + "learning_rate": 9.720346492686435e-07, + "loss": 0.008, + "num_tokens": 36257873.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8200091123580933, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07315521770913438, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08972347141646721, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12049281521534189, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 807 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1331.0, + "completions/max_terminated_length": 1331.0, + "completions/mean_length": 1107.375, + "completions/mean_terminated_length": 1107.375, + "completions/min_length": 870.0, + "completions/min_terminated_length": 870.0, + "epoch": 0.20205051262815704, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8682152862249537, + "kl": 0.004787445068359375, + "learning_rate": 9.71898173063331e-07, + "loss": -0.0098, + "num_tokens": 36298575.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9515619277954102, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.019947749476174743, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06643673549064702, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10671873729054748, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 808 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 1211.75, + "completions/mean_terminated_length": 1192.533447265625, + "completions/min_length": 767.0, + "completions/min_terminated_length": 767.0, + "epoch": 0.20230057514378594, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2064293741583754, + "kl": 0.00734710693359375, + "learning_rate": 9.717613753856358e-07, + "loss": -0.0386, + "num_tokens": 36341427.0, + "reward": 3.725290298461914e-08, + "reward_std": 1.0321815013885498, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03913068694518117, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.055807219370195, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07302967433402217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 809 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1344.0, + "completions/mean_length": 1018.125, + "completions/mean_terminated_length": 906.923095703125, + "completions/min_length": 674.0, + "completions/min_terminated_length": 674.0, + "epoch": 0.20255063765941486, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.372234196247926, + "kl": 0.006011962890625, + "learning_rate": 9.716242563397932e-07, + "loss": 0.0155, + "num_tokens": 36382453.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.969944179058075, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.040330599757049924, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06333195616422584, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11409872268574492, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 810 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1301.0, + "completions/max_terminated_length": 1301.0, + "completions/mean_length": 1082.5625, + "completions/mean_terminated_length": 1082.5625, + "completions/min_length": 826.0, + "completions/min_terminated_length": 826.0, + "epoch": 0.20280070017504376, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.037470964250877, + "kl": 0.0033397674560546875, + "learning_rate": 9.714868160302833e-07, + "loss": -0.0467, + "num_tokens": 36420318.0, + "reward": 0.0, + "reward_std": 0.7863451242446899, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.026355244368384045, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10763046266730364, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09418264367902597, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 811 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 987.0, + "completions/max_terminated_length": 987.0, + "completions/mean_length": 887.375, + "completions/mean_terminated_length": 887.375, + "completions/min_length": 650.0, + "completions/min_terminated_length": 650.0, + "epoch": 0.20305076269067268, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8362447028519653, + "kl": 0.001567840576171875, + "learning_rate": 9.713490545618308e-07, + "loss": -0.0533, + "num_tokens": 36448700.0, + "reward": 0.0, + "reward_std": 0.6109441518783569, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07213574648743228, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11943334552370505, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09108400680852975, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 812 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1429.0, + "completions/max_terminated_length": 1429.0, + "completions/mean_length": 1235.75, + "completions/mean_terminated_length": 1235.75, + "completions/min_length": 1021.0, + "completions/min_terminated_length": 1021.0, + "epoch": 0.20330082520630158, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2024208018075346, + "kl": 0.00445556640625, + "learning_rate": 9.712109720394058e-07, + "loss": 0.0189, + "num_tokens": 36493480.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9513082504272461, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0034959828212849913, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.024972689040486388, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12405196043952266, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 813 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1163.75, + "completions/mean_terminated_length": 1141.3333740234375, + "completions/min_length": 744.0, + "completions/min_terminated_length": 744.0, + "epoch": 0.20355088772193047, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.95043320411992, + "kl": 0.0067291259765625, + "learning_rate": 9.71072568568222e-07, + "loss": -0.0627, + "num_tokens": 36535060.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0515539646148682, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0019391966035610005, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08044661878761385, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08421753138505425, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 814 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1368.0, + "completions/max_terminated_length": 1368.0, + "completions/mean_length": 1064.125, + "completions/mean_terminated_length": 1064.125, + "completions/min_length": 893.0, + "completions/min_terminated_length": 893.0, + "epoch": 0.2038009502375594, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1647405216222197, + "kl": 0.004210472106933594, + "learning_rate": 9.70933844253739e-07, + "loss": 0.0398, + "num_tokens": 36584134.0, + "reward": 0.0, + "reward_std": 0.8177539110183716, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.018976735250821296, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03755020280199971, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1270024788326182, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 815 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1432.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1113.75, + "completions/mean_terminated_length": 1113.75, + "completions/min_length": 589.0, + "completions/min_terminated_length": 589.0, + "epoch": 0.2040510127531883, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.523573375431838, + "kl": 0.00615692138671875, + "learning_rate": 9.707947992016592e-07, + "loss": -0.0418, + "num_tokens": 36621802.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0244393348693848, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.030904549669822527, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0688480785849818, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 816 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 996.6875, + "completions/mean_terminated_length": 924.7857666015625, + "completions/min_length": 464.0, + "completions/min_terminated_length": 464.0, + "epoch": 0.20430107526881722, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0853309270371114, + "kl": 0.00624847412109375, + "learning_rate": 9.706554335179313e-07, + "loss": -0.0489, + "num_tokens": 36661365.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6160600781440735, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.020997710749757606, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04486117640618913, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13709958532503408, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 817 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1421.0, + "completions/max_terminated_length": 1421.0, + "completions/mean_length": 1232.3125, + "completions/mean_terminated_length": 1232.3125, + "completions/min_length": 991.0, + "completions/min_terminated_length": 991.0, + "epoch": 0.2045511377844461, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4130128454583915, + "kl": 0.00756072998046875, + "learning_rate": 9.705157473087466e-07, + "loss": -0.0178, + "num_tokens": 36708594.0, + "reward": 0.0, + "reward_std": 1.022857666015625, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0207739390679091, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09316866594138802, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16903867626692443, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 818 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1074.75, + "completions/mean_terminated_length": 976.6154174804688, + "completions/min_length": 726.0, + "completions/min_terminated_length": 726.0, + "epoch": 0.204801200300075, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9800050348102056, + "kl": 0.0077362060546875, + "learning_rate": 9.703757406805416e-07, + "loss": -0.0452, + "num_tokens": 36754654.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0654590129852295, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.016383641741329044, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05454776355796487, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16599866130651644, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 819 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1300.0, + "completions/mean_length": 1249.8125, + "completions/mean_terminated_length": 1233.1334228515625, + "completions/min_length": 1038.0, + "completions/min_terminated_length": 1038.0, + "epoch": 0.20505126281570393, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.033717435259346, + "kl": 0.0024518966674804688, + "learning_rate": 9.702354137399966e-07, + "loss": 0.0331, + "num_tokens": 36799131.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0123412609100342, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03359285624323855, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05088582274036554, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042253, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 820 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1346.0, + "completions/mean_length": 1173.125, + "completions/mean_terminated_length": 1151.3333740234375, + "completions/min_length": 832.0, + "completions/min_terminated_length": 832.0, + "epoch": 0.20530132533133283, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5143344600425195, + "kl": 0.00817108154296875, + "learning_rate": 9.700947665940364e-07, + "loss": -0.0311, + "num_tokens": 36852597.0, + "reward": 0.0, + "reward_std": 0.7838374376296997, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08929634387717016, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05356541681664587, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1261979632400061, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 821 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1394.0, + "completions/mean_length": 1179.4375, + "completions/mean_terminated_length": 1158.0667724609375, + "completions/min_length": 921.0, + "completions/min_terminated_length": 921.0, + "epoch": 0.20555138784696175, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8932532783861418, + "kl": 0.00586700439453125, + "learning_rate": 9.69953799349829e-07, + "loss": -0.0103, + "num_tokens": 36895196.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.016849398612976, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07599880740793896, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1611245143074282, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 822 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1207.625, + "completions/mean_terminated_length": 1188.1334228515625, + "completions/min_length": 906.0, + "completions/min_terminated_length": 906.0, + "epoch": 0.20580145036259065, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.61116816456, + "kl": 0.0087127685546875, + "learning_rate": 9.69812512114787e-07, + "loss": -0.0284, + "num_tokens": 36948270.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9074162840843201, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.012205342159827617, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06984098371525183, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869924, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 823 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 883.0, + "completions/max_terminated_length": 883.0, + "completions/mean_length": 736.0, + "completions/mean_terminated_length": 736.0, + "completions/min_length": 568.0, + "completions/min_terminated_length": 568.0, + "epoch": 0.20605151287821954, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.084015316345504, + "kl": 0.0056133270263671875, + "learning_rate": 9.696709049965664e-07, + "loss": -0.0372, + "num_tokens": 36975246.0, + "reward": 0.0, + "reward_std": 0.8228803873062134, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07330030837344016, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1386370906228244, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09418264367902596, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 824 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1392.0, + "completions/mean_length": 1118.3125, + "completions/mean_terminated_length": 821.4444580078125, + "completions/min_length": 694.0, + "completions/min_terminated_length": 694.0, + "epoch": 0.20630157539384847, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.846984876159989, + "kl": 0.004360198974609375, + "learning_rate": 9.695289781030673e-07, + "loss": 0.0353, + "num_tokens": 37017699.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.001867651939392, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.16049061368895054, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2411181113222849, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639736, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 825 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1421.625, + "completions/mean_terminated_length": 1320.857177734375, + "completions/min_length": 849.0, + "completions/min_terminated_length": 849.0, + "epoch": 0.20655163790947736, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0754862274473354, + "kl": 0.00730133056640625, + "learning_rate": 9.693867315424332e-07, + "loss": -0.0102, + "num_tokens": 37077093.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9792826771736145, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05345112718375151, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.033010163197356224, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06070572613176771, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 826 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1209.1875, + "completions/mean_terminated_length": 1167.6429443359375, + "completions/min_length": 747.0, + "completions/min_terminated_length": 747.0, + "epoch": 0.2068017004251063, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.234398212224772, + "kl": 0.00681304931640625, + "learning_rate": 9.692441654230512e-07, + "loss": -0.0077, + "num_tokens": 37128176.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9349316358566284, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06930569252624758, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1773081457222778, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 827 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1368.5, + "completions/mean_terminated_length": 1237.0, + "completions/min_length": 1089.0, + "completions/min_terminated_length": 1089.0, + "epoch": 0.20705176294073518, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.061251061526901, + "kl": 0.00628662109375, + "learning_rate": 9.691012798535522e-07, + "loss": -0.0159, + "num_tokens": 37189072.0, + "reward": 3.725290298461914e-08, + "reward_std": 0.9954110383987427, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.008208965467083225, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.01654414899019048, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.65, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10470416879457552, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 828 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1431.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 901.9375, + "completions/mean_terminated_length": 901.9375, + "completions/min_length": 647.0, + "completions/min_terminated_length": 647.0, + "epoch": 0.20730182545636408, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7309068748047745, + "kl": 0.0043087005615234375, + "learning_rate": 9.6895807494281e-07, + "loss": 0.068, + "num_tokens": 37217775.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.710112452507019, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01727523672040227, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04331786175016389, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 829 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1327.75, + "completions/mean_terminated_length": 1155.5, + "completions/min_length": 945.0, + "completions/min_terminated_length": 945.0, + "epoch": 0.207551887971993, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.527797846670965, + "kl": 0.005825042724609375, + "learning_rate": 9.68814550799942e-07, + "loss": 0.0299, + "num_tokens": 37273499.0, + "reward": 0.0, + "reward_std": 0.8290033340454102, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03611846471558439, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04734953644768672, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0806225774829855, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 830 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1366.6875, + "completions/mean_terminated_length": 1263.0, + "completions/min_length": 933.0, + "completions/min_terminated_length": 933.0, + "epoch": 0.2078019504876219, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4064938989047913, + "kl": 0.00476837158203125, + "learning_rate": 9.686707075343089e-07, + "loss": -0.0523, + "num_tokens": 37332398.0, + "reward": 4.470348358154297e-08, + "reward_std": 0.9927560091018677, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02299708792842277, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06178877077745778, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10945995377982529, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 831 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1398.1875, + "completions/mean_terminated_length": 1374.6923828125, + "completions/min_length": 1185.0, + "completions/min_terminated_length": 1185.0, + "epoch": 0.20805201300325082, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.595427484675683, + "kl": 0.00496673583984375, + "learning_rate": 9.685265452555144e-07, + "loss": -0.0191, + "num_tokens": 37382593.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8175119161605835, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09529760985520745, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07579685439290722, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08421753138505425, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 832 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1291.0, + "completions/mean_length": 909.5625, + "completions/mean_terminated_length": 870.2000732421875, + "completions/min_length": 557.0, + "completions/min_terminated_length": 557.0, + "epoch": 0.20830207551887972, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4602678612459257, + "kl": 0.007537841796875, + "learning_rate": 9.683820640734054e-07, + "loss": 0.0121, + "num_tokens": 37423426.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9712800979614258, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02701099134668036, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0536110180146249, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059004080210452246, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 833 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1187.375, + "completions/mean_terminated_length": 1166.533447265625, + "completions/min_length": 846.0, + "completions/min_terminated_length": 846.0, + "epoch": 0.20855213803450862, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1971671030276623, + "kl": 0.00673675537109375, + "learning_rate": 9.682372640980718e-07, + "loss": -0.005, + "num_tokens": 37468992.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.8848727941513062, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00197017072291503, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0810788021164857, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026005, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 834 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1410.0, + "completions/max_terminated_length": 1410.0, + "completions/mean_length": 1215.75, + "completions/mean_terminated_length": 1215.75, + "completions/min_length": 966.0, + "completions/min_terminated_length": 966.0, + "epoch": 0.20880220055013754, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8266270526107946, + "kl": 0.006134033203125, + "learning_rate": 9.680921454398463e-07, + "loss": 0.0012, + "num_tokens": 37510908.0, + "reward": 0.0, + "reward_std": 0.839897632598877, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12491144750548207, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08140330975365517, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797315, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 835 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1211.0, + "completions/max_terminated_length": 1211.0, + "completions/mean_length": 965.75, + "completions/mean_terminated_length": 965.75, + "completions/min_length": 680.0, + "completions/min_terminated_length": 680.0, + "epoch": 0.20905226306576644, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.469060387341573, + "kl": 0.00850677490234375, + "learning_rate": 9.679467082093045e-07, + "loss": 0.0152, + "num_tokens": 37560752.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9950760006904602, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.049286134096563274, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08902430470255925, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0666666666666667, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 836 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1205.625, + "completions/mean_terminated_length": 1186.0001220703125, + "completions/min_length": 993.0, + "completions/min_terminated_length": 993.0, + "epoch": 0.20930232558139536, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9471428239213875, + "kl": 0.005889892578125, + "learning_rate": 9.67800952517265e-07, + "loss": -0.047, + "num_tokens": 37608826.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9095619916915894, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06774073466249726, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08004599009013184, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07187952884282608, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 837 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1286.25, + "completions/mean_terminated_length": 1215.0, + "completions/min_length": 832.0, + "completions/min_terminated_length": 832.0, + "epoch": 0.20955238809702426, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1801262823612984, + "kl": 0.00655364990234375, + "learning_rate": 9.676548784747885e-07, + "loss": -0.0071, + "num_tokens": 37662966.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7618764042854309, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03979807340728452, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0669560730768849, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14401645996461912, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 838 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1247.0, + "completions/mean_length": 1266.8125, + "completions/mean_terminated_length": 967.0000610351562, + "completions/min_length": 765.0, + "completions/min_terminated_length": 765.0, + "epoch": 0.20980245061265315, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6375657604934193, + "kl": 0.00556182861328125, + "learning_rate": 9.675084861931785e-07, + "loss": -0.0562, + "num_tokens": 37707187.0, + "reward": 0.0, + "reward_std": 0.541621208190918, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.34166197674804105, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.35152786917676926, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0969917904124231, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 839 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.8125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1473.4375, + "completions/mean_terminated_length": 1358.3333740234375, + "completions/min_length": 1237.0, + "completions/min_terminated_length": 1237.0, + "epoch": 0.21005251312828208, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2890175609546155, + "kl": 0.0040435791015625, + "learning_rate": 9.673617757839813e-07, + "loss": 0.0001, + "num_tokens": 37768754.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0589768886566162, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03563224533990897, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06895803739426108, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07302967433402213, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 840 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1357.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 1022.25, + "completions/mean_terminated_length": 1022.25, + "completions/min_length": 614.0, + "completions/min_terminated_length": 614.0, + "epoch": 0.21030257564391097, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9914905428733065, + "kl": 0.0046539306640625, + "learning_rate": 9.672147473589851e-07, + "loss": -0.008, + "num_tokens": 37804798.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.35099777579307556, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.006066937307574246, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16147130856645694, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13270686158262923, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 841 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1136.0, + "completions/max_terminated_length": 1136.0, + "completions/mean_length": 949.375, + "completions/mean_terminated_length": 949.375, + "completions/min_length": 702.0, + "completions/min_terminated_length": 702.0, + "epoch": 0.2105526381595399, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3103017638868755, + "kl": 0.005146026611328125, + "learning_rate": 9.67067401030221e-07, + "loss": -0.0819, + "num_tokens": 37841588.0, + "reward": -3.3527612686157227e-08, + "reward_std": 1.05172598361969, + "rewards/wordcountpos_reward_GEOBench/mean": -3.3527612686157227e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.020903144276536348, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07059961680011348, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05288001793018131, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 842 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1246.1875, + "completions/mean_terminated_length": 1209.9285888671875, + "completions/min_length": 802.0, + "completions/min_terminated_length": 802.0, + "epoch": 0.2108027006751688, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.973805792704466, + "kl": 0.00682830810546875, + "learning_rate": 9.669197369099616e-07, + "loss": 0.0743, + "num_tokens": 37899455.0, + "reward": 0.0, + "reward_std": 0.846670389175415, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.013416095201799049, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06471297576030649, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15341785110291775, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 843 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1282.0, + "completions/mean_length": 1102.0, + "completions/mean_terminated_length": 969.3333740234375, + "completions/min_length": 528.0, + "completions/min_terminated_length": 528.0, + "epoch": 0.2110527631907977, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.273614846566732, + "kl": 0.00634765625, + "learning_rate": 9.667717551107224e-07, + "loss": -0.0111, + "num_tokens": 37953415.0, + "reward": 0.0, + "reward_std": 0.8003566265106201, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.041398608430826704, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05492995640364453, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15910630036178586, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 844 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1230.0, + "completions/mean_length": 1001.0625, + "completions/mean_terminated_length": 701.7000122070312, + "completions/min_length": 563.0, + "completions/min_terminated_length": 563.0, + "epoch": 0.2113028257064266, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.641766614740499, + "kl": 0.004962921142578125, + "learning_rate": 9.666234557452603e-07, + "loss": 0.0336, + "num_tokens": 37999984.0, + "reward": 1.1175870895385742e-08, + "reward_std": 0.8623543977737427, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10548020289969975, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15210518977412862, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05374838498865702, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 845 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 915.0, + "completions/mean_length": 1005.5, + "completions/mean_terminated_length": 708.7999877929688, + "completions/min_length": 307.0, + "completions/min_terminated_length": 307.0, + "epoch": 0.2115528882220555, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.435583409622942, + "kl": 0.0062103271484375, + "learning_rate": 9.664748389265748e-07, + "loss": -0.1856, + "num_tokens": 38049520.0, + "reward": 0.0, + "reward_std": 0.7817102670669556, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.19957044607263227, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1687216466328169, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09803627446568497, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 846 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1300.0, + "completions/max_terminated_length": 1300.0, + "completions/mean_length": 985.875, + "completions/mean_terminated_length": 985.875, + "completions/min_length": 774.0, + "completions/min_terminated_length": 774.0, + "epoch": 0.21180295073768443, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8317204919773697, + "kl": 0.003673553466796875, + "learning_rate": 9.663259047679069e-07, + "loss": -0.0208, + "num_tokens": 38081054.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.994848370552063, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03447289599635452, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06934563230022972, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503964, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 847 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1275.0, + "completions/max_terminated_length": 1275.0, + "completions/mean_length": 849.375, + "completions/mean_terminated_length": 849.375, + "completions/min_length": 651.0, + "completions/min_terminated_length": 651.0, + "epoch": 0.21205301325331333, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.130485336947011, + "kl": 0.009765625, + "learning_rate": 9.661766533827391e-07, + "loss": 0.0441, + "num_tokens": 38109828.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5642310976982117, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -5.630091980721305e-05, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.028223527712088144, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5791666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11080513425729774, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 848 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1469.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1222.125, + "completions/mean_terminated_length": 1222.125, + "completions/min_length": 838.0, + "completions/min_terminated_length": 838.0, + "epoch": 0.21230307576894222, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.66409854927175, + "kl": 0.004413604736328125, + "learning_rate": 9.660270848847966e-07, + "loss": 0.019, + "num_tokens": 38165534.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7799407243728638, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.017166492576211882, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10361949235218265, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133131, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 849 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1253.5, + "completions/mean_terminated_length": 1171.3333740234375, + "completions/min_length": 803.0, + "completions/min_terminated_length": 803.0, + "epoch": 0.21255313828457115, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.225861883493599, + "kl": 0.013153076171875, + "learning_rate": 9.658771993880452e-07, + "loss": -0.0632, + "num_tokens": 38218750.0, + "reward": 0.0, + "reward_std": 0.5657997727394104, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11601315864751295, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.24750933174510661, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1517795672580372, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 850 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1455.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1060.5, + "completions/mean_terminated_length": 1060.5, + "completions/min_length": 732.0, + "completions/min_terminated_length": 732.0, + "epoch": 0.21280320080020004, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2870157458794766, + "kl": 0.007904052734375, + "learning_rate": 9.657269970066927e-07, + "loss": -0.0006, + "num_tokens": 38247886.0, + "reward": 0.0, + "reward_std": 0.9735749959945679, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.2019007196324523, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2881702127824437, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1520233900132184, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 851 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1307.0, + "completions/max_terminated_length": 1307.0, + "completions/mean_length": 854.625, + "completions/mean_terminated_length": 854.625, + "completions/min_length": 563.0, + "completions/min_terminated_length": 563.0, + "epoch": 0.21305326331582897, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7026678477915964, + "kl": 0.00691986083984375, + "learning_rate": 9.655764778551886e-07, + "loss": -0.0637, + "num_tokens": 38273904.0, + "reward": 0.0, + "reward_std": 0.6883244514465332, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.13938879483541664, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09752563402336624, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1067187372905475, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 852 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1069.0, + "completions/max_terminated_length": 1069.0, + "completions/mean_length": 925.375, + "completions/mean_terminated_length": 925.375, + "completions/min_length": 708.0, + "completions/min_terminated_length": 708.0, + "epoch": 0.21330332583145786, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.588501039418753, + "kl": 0.0074005126953125, + "learning_rate": 9.654256420482234e-07, + "loss": 0.0011, + "num_tokens": 38299710.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9437212944030762, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019126808803808577, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11721997211521222, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08777074514725114, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 853 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1294.0625, + "completions/mean_terminated_length": 1088.125, + "completions/min_length": 919.0, + "completions/min_terminated_length": 919.0, + "epoch": 0.21355338834708676, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.751248326636049, + "kl": 0.00566864013671875, + "learning_rate": 9.652744897007288e-07, + "loss": -0.0377, + "num_tokens": 38350015.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9732564091682434, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04443539599621927, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06797125992978366, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16594287281181147, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 854 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1086.5625, + "completions/mean_terminated_length": 898.6364135742188, + "completions/min_length": 644.0, + "completions/min_terminated_length": 644.0, + "epoch": 0.21380345086271568, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.486189101597367, + "kl": 0.004421234130859375, + "learning_rate": 9.65123020927878e-07, + "loss": -0.08, + "num_tokens": 38392528.0, + "reward": 0.0, + "reward_std": 0.881582498550415, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0006967438679255575, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0640830433111657, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 855 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1009.0, + "completions/mean_length": 861.5625, + "completions/mean_terminated_length": 770.357177734375, + "completions/min_length": 486.0, + "completions/min_terminated_length": 486.0, + "epoch": 0.21405351337834458, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5546916497674936, + "kl": 0.0056304931640625, + "learning_rate": 9.649712358450848e-07, + "loss": -0.1028, + "num_tokens": 38429633.0, + "reward": 0.0, + "reward_std": 1.0480425357818604, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06213574913328838, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09994461761343887, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237131, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 856 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1137.875, + "completions/mean_terminated_length": 1113.7333984375, + "completions/min_length": 711.0, + "completions/min_terminated_length": 711.0, + "epoch": 0.2143035758939735, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4013588471416827, + "kl": 0.00771331787109375, + "learning_rate": 9.64819134568005e-07, + "loss": -0.022, + "num_tokens": 38472311.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.959078311920166, + "rewards/wordcountpos_reward_GEOBench/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.024769612746454832, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.052359934454826826, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14168300559373406, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 857 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1404.0, + "completions/mean_length": 1145.75, + "completions/mean_terminated_length": 1095.1429443359375, + "completions/min_length": 741.0, + "completions/min_terminated_length": 741.0, + "epoch": 0.2145536384096024, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2443053559362047, + "kl": 0.00659942626953125, + "learning_rate": 9.646667172125342e-07, + "loss": -0.0191, + "num_tokens": 38512971.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0369646549224854, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0905827503821685, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0980986639667052, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353542, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 858 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1374.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 1108.5625, + "completions/mean_terminated_length": 1108.5625, + "completions/min_length": 963.0, + "completions/min_terminated_length": 963.0, + "epoch": 0.2148037009252313, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6704117227003485, + "kl": 0.0085906982421875, + "learning_rate": 9.645139838948097e-07, + "loss": -0.0306, + "num_tokens": 38561508.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6647017002105713, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.028133885513726656, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1897019279285739, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08432740427115681, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 859 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1320.0, + "completions/mean_length": 1185.125, + "completions/mean_terminated_length": 996.2000122070312, + "completions/min_length": 801.0, + "completions/min_terminated_length": 801.0, + "epoch": 0.21505376344086022, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.096443367253993, + "kl": 0.0062713623046875, + "learning_rate": 9.643609347312092e-07, + "loss": 0.0032, + "num_tokens": 38606510.0, + "reward": 0.0, + "reward_std": 0.7319425344467163, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.20010388006815666, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10595044168326571, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059472994182545084, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 860 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1374.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 1022.4375, + "completions/mean_terminated_length": 1022.4375, + "completions/min_length": 639.0, + "completions/min_terminated_length": 639.0, + "epoch": 0.21530382595648911, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7342568184689178, + "kl": 0.0075225830078125, + "learning_rate": 9.64207569838351e-07, + "loss": -0.0044, + "num_tokens": 38655733.0, + "reward": 0.0, + "reward_std": 0.7568542957305908, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01985156461954929, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02598081472420531, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6124999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1010133837850396, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 861 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1347.0, + "completions/mean_length": 1415.0625, + "completions/mean_terminated_length": 1273.5, + "completions/min_length": 1178.0, + "completions/min_terminated_length": 1178.0, + "epoch": 0.21555388847211804, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8489398056228534, + "kl": 0.00629425048828125, + "learning_rate": 9.640538893330935e-07, + "loss": -0.0003, + "num_tokens": 38717502.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0135189294815063, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08053928931320364, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11339715827437477, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563383, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 862 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1225.0, + "completions/max_terminated_length": 1225.0, + "completions/mean_length": 955.3125, + "completions/mean_terminated_length": 955.3125, + "completions/min_length": 661.0, + "completions/min_terminated_length": 661.0, + "epoch": 0.21580395098774693, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8660081058831253, + "kl": 0.004329681396484375, + "learning_rate": 9.638998933325372e-07, + "loss": 0.0025, + "num_tokens": 38763235.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9226922988891602, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.034065242061974484, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09673172295851047, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0925962962222252, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 863 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1283.0, + "completions/mean_length": 1063.375, + "completions/mean_terminated_length": 962.6154174804688, + "completions/min_length": 723.0, + "completions/min_terminated_length": 723.0, + "epoch": 0.21605401350337583, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9343333425953793, + "kl": 0.0059356689453125, + "learning_rate": 9.637455819540213e-07, + "loss": -0.0107, + "num_tokens": 38806977.0, + "reward": 0.0, + "reward_std": 0.7031272649765015, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.046742611360596265, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05236424586113956, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05163977794943227, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 864 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1335.0, + "completions/mean_length": 1375.375, + "completions/mean_terminated_length": 1215.1429443359375, + "completions/min_length": 1104.0, + "completions/min_terminated_length": 1104.0, + "epoch": 0.21630407601900475, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.885867759958327, + "kl": 0.01123046875, + "learning_rate": 9.635909553151258e-07, + "loss": 0.0109, + "num_tokens": 38873847.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6492425203323364, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.025743325615288207, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09596540501159566, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15752718754175363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 865 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1342.1875, + "completions/mean_terminated_length": 1319.6429443359375, + "completions/min_length": 1092.0, + "completions/min_terminated_length": 1092.0, + "epoch": 0.21655413853463365, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1651383083159037, + "kl": 0.003101348876953125, + "learning_rate": 9.634360135336712e-07, + "loss": -0.0581, + "num_tokens": 38920690.0, + "reward": 0.0, + "reward_std": 0.8517165184020996, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10547106272706697, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11882223330523957, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07290277645477446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 866 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1445.4375, + "completions/mean_terminated_length": 1390.875, + "completions/min_length": 1243.0, + "completions/min_terminated_length": 1243.0, + "epoch": 0.21680420105026257, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9732666133869303, + "kl": 0.00673675537109375, + "learning_rate": 9.632807567277182e-07, + "loss": 0.0036, + "num_tokens": 38974345.0, + "reward": 0.0, + "reward_std": 0.41026201844215393, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0183279610222858, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20119124655699616, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09888264649460887, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 867 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1148.75, + "completions/mean_terminated_length": 1125.3333740234375, + "completions/min_length": 858.0, + "completions/min_terminated_length": 858.0, + "epoch": 0.21705426356589147, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9127336894604827, + "kl": 0.00621795654296875, + "learning_rate": 9.631251850155675e-07, + "loss": 0.0057, + "num_tokens": 39022301.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9425694346427917, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.028998751953319114, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.01832207116324727, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639733, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 868 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1384.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1026.375, + "completions/mean_terminated_length": 1026.375, + "completions/min_length": 715.0, + "completions/min_terminated_length": 715.0, + "epoch": 0.2173043260815204, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.66589149967265, + "kl": 0.0087432861328125, + "learning_rate": 9.62969298515759e-07, + "loss": 0.0164, + "num_tokens": 39067419.0, + "reward": 0.0, + "reward_std": 0.5713178515434265, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0381018829863265, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04468389614888739, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08073734277593311, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 869 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1425.0, + "completions/mean_length": 1280.625, + "completions/mean_terminated_length": 1180.9091796875, + "completions/min_length": 894.0, + "completions/min_terminated_length": 894.0, + "epoch": 0.2175543885971493, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6463549833797617, + "kl": 0.007110595703125, + "learning_rate": 9.628130973470731e-07, + "loss": 0.0235, + "num_tokens": 39116589.0, + "reward": 0.0, + "reward_std": 0.8402132987976074, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04551876581883977, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03346839960167545, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353543, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 870 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1053.375, + "completions/mean_terminated_length": 950.3077392578125, + "completions/min_length": 681.0, + "completions/min_terminated_length": 681.0, + "epoch": 0.21780445111277819, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.688523491941024, + "kl": 0.00801849365234375, + "learning_rate": 9.626565816285303e-07, + "loss": -0.019, + "num_tokens": 39163259.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0553568601608276, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05898841723431295, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10614765609965422, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1002773930432755, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 871 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1149.0, + "completions/max_terminated_length": 1149.0, + "completions/mean_length": 942.9375, + "completions/mean_terminated_length": 942.9375, + "completions/min_length": 758.0, + "completions/min_terminated_length": 758.0, + "epoch": 0.2180545136284071, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5215816800221886, + "kl": 0.00841522216796875, + "learning_rate": 9.624997514793903e-07, + "loss": -0.0039, + "num_tokens": 39196778.0, + "reward": 0.0, + "reward_std": 0.5148476362228394, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.006545168249385942, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.018423843961561444, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12852438880818895, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 872 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1400.0, + "completions/mean_length": 1256.9375, + "completions/mean_terminated_length": 1111.0999755859375, + "completions/min_length": 758.0, + "completions/min_terminated_length": 758.0, + "epoch": 0.218304576144036, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.78160358300042, + "kl": 0.0068511962890625, + "learning_rate": 9.62342607019152e-07, + "loss": -0.0699, + "num_tokens": 39249145.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0273361206054688, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0268125, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06918860093975018, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12852438880818892, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 873 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1465.5, + "completions/mean_terminated_length": 1389.5999755859375, + "completions/min_length": 1149.0, + "completions/min_terminated_length": 1149.0, + "epoch": 0.21855463865966493, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.107211312964834, + "kl": 0.00704193115234375, + "learning_rate": 9.62185148367555e-07, + "loss": -0.0093, + "num_tokens": 39305249.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8954590559005737, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.003708914451503259, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05816981168296909, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08255189164891874, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 874 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1484.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1142.5625, + "completions/mean_terminated_length": 1142.5625, + "completions/min_length": 701.0, + "completions/min_terminated_length": 701.0, + "epoch": 0.21880470117529383, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.2270920887802947, + "kl": 0.0022182464599609375, + "learning_rate": 9.620273756445768e-07, + "loss": 0.0366, + "num_tokens": 39338978.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9181548357009888, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01814596698738806, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0332182436999318, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466157, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 875 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1310.0, + "completions/max_terminated_length": 1310.0, + "completions/mean_length": 1014.4375, + "completions/mean_terminated_length": 1014.4375, + "completions/min_length": 862.0, + "completions/min_terminated_length": 862.0, + "epoch": 0.21905476369092272, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1104489553877426, + "kl": 0.00823211669921875, + "learning_rate": 9.618692889704353e-07, + "loss": -0.0262, + "num_tokens": 39390473.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.3751347064971924, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03522863069104046, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11334297531088482, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238707, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 876 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1435.0, + "completions/mean_length": 1201.0, + "completions/mean_terminated_length": 1132.0, + "completions/min_length": 568.0, + "completions/min_terminated_length": 568.0, + "epoch": 0.21930482620655165, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3199233407109228, + "kl": 0.0064239501953125, + "learning_rate": 9.617108884655875e-07, + "loss": 0.0465, + "num_tokens": 39433353.0, + "reward": 0.0, + "reward_std": 0.89996337890625, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.001643845034500424, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05815012840885205, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11417984514369006, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 877 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1339.0, + "completions/max_terminated_length": 1339.0, + "completions/mean_length": 1129.4375, + "completions/mean_terminated_length": 1129.4375, + "completions/min_length": 977.0, + "completions/min_terminated_length": 977.0, + "epoch": 0.21955488872218054, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.348480238431781, + "kl": 0.003528594970703125, + "learning_rate": 9.61552174250729e-07, + "loss": -0.0179, + "num_tokens": 39484464.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0224988460540771, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.031457916613106464, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07017491502431722, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10174405069512348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 878 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1366.0, + "completions/max_terminated_length": 1366.0, + "completions/mean_length": 1064.5625, + "completions/mean_terminated_length": 1064.5625, + "completions/min_length": 791.0, + "completions/min_terminated_length": 791.0, + "epoch": 0.21980495123780946, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9974728241278767, + "kl": 0.0132904052734375, + "learning_rate": 9.61393146446795e-07, + "loss": -0.0305, + "num_tokens": 39527057.0, + "reward": 0.0, + "reward_std": 0.7245502471923828, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02741274098452869, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06807306862721274, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1029203215725281, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 879 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1387.0, + "completions/mean_length": 1199.8125, + "completions/mean_terminated_length": 1099.75, + "completions/min_length": 940.0, + "completions/min_terminated_length": 940.0, + "epoch": 0.22005501375343836, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6469310204207095, + "kl": 0.005462646484375, + "learning_rate": 9.61233805174959e-07, + "loss": -0.0012, + "num_tokens": 39574854.0, + "reward": 0.0, + "reward_std": 0.7754224538803101, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08570501640200527, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09358983895043556, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0718795288428261, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 880 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 917.125, + "completions/mean_terminated_length": 878.2667236328125, + "completions/min_length": 474.0, + "completions/min_terminated_length": 474.0, + "epoch": 0.22030507626906726, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.603577674309972, + "kl": 0.006622314453125, + "learning_rate": 9.610741505566343e-07, + "loss": 0.0311, + "num_tokens": 39625392.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0609140396118164, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.008110158571747399, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.084847549301419, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921945, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 881 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1369.625, + "completions/mean_terminated_length": 1339.5384521484375, + "completions/min_length": 1099.0, + "completions/min_terminated_length": 1099.0, + "epoch": 0.22055513878469618, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.727002160316296, + "kl": 0.00597381591796875, + "learning_rate": 9.609141827134721e-07, + "loss": -0.0213, + "num_tokens": 39680778.0, + "reward": 0.0, + "reward_std": 0.6313326954841614, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08857740170532775, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10525153323668038, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 882 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1191.75, + "completions/mean_terminated_length": 1147.71435546875, + "completions/min_length": 749.0, + "completions/min_terminated_length": 749.0, + "epoch": 0.22080520130032508, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2443599109477588, + "kl": 0.00701141357421875, + "learning_rate": 9.607539017673624e-07, + "loss": 0.03, + "num_tokens": 39725662.0, + "reward": 0.0, + "reward_std": 0.9558100700378418, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.004029434203131576, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08973324899999445, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11122216672215289, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 883 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1359.125, + "completions/mean_terminated_length": 1274.5999755859375, + "completions/min_length": 934.0, + "completions/min_terminated_length": 934.0, + "epoch": 0.221055263815954, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8470289761323615, + "kl": 0.005847930908203125, + "learning_rate": 9.605933078404343e-07, + "loss": 0.031, + "num_tokens": 39774208.0, + "reward": 0.0, + "reward_std": 0.6099818348884583, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.035930604986996934, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09769441023091383, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8250000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04127594582445936, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 884 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1160.0, + "completions/max_terminated_length": 1160.0, + "completions/mean_length": 900.375, + "completions/mean_terminated_length": 900.375, + "completions/min_length": 543.0, + "completions/min_terminated_length": 543.0, + "epoch": 0.2213053263315829, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.572453315138521, + "kl": 0.0038814544677734375, + "learning_rate": 9.60432401055055e-07, + "loss": -0.048, + "num_tokens": 39810918.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.723963737487793, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.034499383994947445, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02977482193870789, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.19148542155126763, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 885 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1422.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 989.9375, + "completions/mean_terminated_length": 989.9375, + "completions/min_length": 651.0, + "completions/min_terminated_length": 651.0, + "epoch": 0.2215553888472118, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.460534806535485, + "kl": 0.0070037841796875, + "learning_rate": 9.6027118153383e-07, + "loss": 0.046, + "num_tokens": 39843693.0, + "reward": 0.0, + "reward_std": 0.8683041930198669, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07968197359827174, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17786255081998167, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6416666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194864, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 886 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1396.0, + "completions/max_terminated_length": 1396.0, + "completions/mean_length": 1007.3125, + "completions/mean_terminated_length": 1007.3125, + "completions/min_length": 594.0, + "completions/min_terminated_length": 594.0, + "epoch": 0.22180545136284072, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.272585000338859, + "kl": 0.00647735595703125, + "learning_rate": 9.601096493996031e-07, + "loss": -0.0751, + "num_tokens": 39884498.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9940682053565979, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04911110030170719, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08998582434859904, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12524050936172845, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 887 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1281.3125, + "completions/mean_terminated_length": 1250.071533203125, + "completions/min_length": 1095.0, + "completions/min_terminated_length": 1095.0, + "epoch": 0.2220555138784696, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.919502305735604, + "kl": 0.00739288330078125, + "learning_rate": 9.599478047754566e-07, + "loss": -0.0245, + "num_tokens": 39939575.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8649775385856628, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.007009309932475835, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12920886081935035, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.18297945644656802, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 888 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1086.0, + "completions/mean_length": 1125.5, + "completions/mean_terminated_length": 834.2222290039062, + "completions/min_length": 531.0, + "completions/min_terminated_length": 531.0, + "epoch": 0.22230557639409854, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.740399640249582, + "kl": 0.00557708740234375, + "learning_rate": 9.597856477847108e-07, + "loss": -0.1043, + "num_tokens": 39980727.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9452449083328247, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.001837164971156064, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.027030504896537993, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1605545943838973, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 889 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1123.0, + "completions/mean_terminated_length": 997.3333740234375, + "completions/min_length": 662.0, + "completions/min_terminated_length": 662.0, + "epoch": 0.22255563890972743, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2166961912961196, + "kl": 0.007171630859375, + "learning_rate": 9.596231785509239e-07, + "loss": -0.0087, + "num_tokens": 40016567.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7447810173034668, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10288451228467799, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.5798936831280624, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941137, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 890 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1301.25, + "completions/mean_terminated_length": 1235.0, + "completions/min_length": 703.0, + "completions/min_terminated_length": 703.0, + "epoch": 0.22280570142535633, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.591383294426292, + "kl": 0.00565338134765625, + "learning_rate": 9.594603971978918e-07, + "loss": -0.0398, + "num_tokens": 40072963.0, + "reward": 0.0, + "reward_std": 0.9468247294425964, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0009259485733523806, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1796899819891803, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466156, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 891 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1349.5, + "completions/mean_terminated_length": 1299.3333740234375, + "completions/min_length": 1130.0, + "completions/min_terminated_length": 1130.0, + "epoch": 0.22305576394098525, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.782777154685683, + "kl": 0.00543212890625, + "learning_rate": 9.592973038496487e-07, + "loss": -0.0171, + "num_tokens": 40130035.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9432533979415894, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.105266815617245, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04427115098035114, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941139, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 892 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1404.3125, + "completions/mean_terminated_length": 1244.8333740234375, + "completions/min_length": 994.0, + "completions/min_terminated_length": 994.0, + "epoch": 0.22330582645661415, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3286604402452897, + "kl": 0.004024505615234375, + "learning_rate": 9.591338986304663e-07, + "loss": -0.0018, + "num_tokens": 40192104.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9956977367401123, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03765318083043316, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10940547849782532, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087684, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 893 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 943.75, + "completions/mean_terminated_length": 906.6666870117188, + "completions/min_length": 637.0, + "completions/min_terminated_length": 637.0, + "epoch": 0.22355588897224307, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9541623110015403, + "kl": 0.0056610107421875, + "learning_rate": 9.589701816648537e-07, + "loss": 0.0008, + "num_tokens": 40218396.0, + "reward": 0.0, + "reward_std": 0.7498390674591064, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.002946733414157669, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20002442685773986, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 894 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1228.8125, + "completions/mean_terminated_length": 1210.7333984375, + "completions/min_length": 707.0, + "completions/min_terminated_length": 707.0, + "epoch": 0.22380595148787197, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9264840961528034, + "kl": 0.006683349609375, + "learning_rate": 9.588061530775582e-07, + "loss": 0.0164, + "num_tokens": 40273433.0, + "reward": 0.0, + "reward_std": 0.9839382171630859, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07055020195225965, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08959379822231721, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14907119849998599, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 895 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1442.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1199.5, + "completions/mean_terminated_length": 1199.5, + "completions/min_length": 883.0, + "completions/min_terminated_length": 883.0, + "epoch": 0.22405601400350086, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7462839376562496, + "kl": 0.00556182861328125, + "learning_rate": 9.586418129935638e-07, + "loss": 0.0132, + "num_tokens": 40318553.0, + "reward": 0.0, + "reward_std": 0.7144777774810791, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.036305705500638004, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04896992705757124, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941139, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 896 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1218.0, + "completions/mean_length": 1005.4375, + "completions/mean_terminated_length": 972.4667358398438, + "completions/min_length": 529.0, + "completions/min_terminated_length": 529.0, + "epoch": 0.2243060765191298, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.895002380828872, + "kl": 0.0104522705078125, + "learning_rate": 9.584771615380924e-07, + "loss": -0.0261, + "num_tokens": 40372272.0, + "reward": 0.0, + "reward_std": 0.820244312286377, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06980951417113583, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08159445014514753, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14291929864761418, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 897 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1167.0, + "completions/mean_length": 1112.5625, + "completions/mean_terminated_length": 936.45458984375, + "completions/min_length": 670.0, + "completions/min_terminated_length": 670.0, + "epoch": 0.22455613903475868, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1409318561017967, + "kl": 0.0036983489990234375, + "learning_rate": 9.583121988366028e-07, + "loss": 0.0687, + "num_tokens": 40406489.0, + "reward": 0.0, + "reward_std": 0.5906459093093872, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03988880101112365, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07433917587177974, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563383, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 898 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1467.0, + "completions/mean_length": 1234.3125, + "completions/mean_terminated_length": 1113.5455322265625, + "completions/min_length": 872.0, + "completions/min_terminated_length": 872.0, + "epoch": 0.2248062015503876, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.953550565411987, + "kl": 0.00641632080078125, + "learning_rate": 9.581469250147912e-07, + "loss": 0.049, + "num_tokens": 40462182.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7678444385528564, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.039310551029881964, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05927176812339545, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0938872452190116, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 899 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1205.0, + "completions/max_terminated_length": 1205.0, + "completions/mean_length": 1075.375, + "completions/mean_terminated_length": 1075.375, + "completions/min_length": 889.0, + "completions/min_terminated_length": 889.0, + "epoch": 0.2250562640660165, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.910104752754398, + "kl": 0.00543212890625, + "learning_rate": 9.579813401985907e-07, + "loss": 0.0252, + "num_tokens": 40507124.0, + "reward": 0.0, + "reward_std": 0.9499566555023193, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01170732051470155, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12233884237301174, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06440611887195308, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 900 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1237.3125, + "completions/mean_terminated_length": 1199.7857666015625, + "completions/min_length": 842.0, + "completions/min_terminated_length": 842.0, + "epoch": 0.2253063265816454, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.425726532110122, + "kl": 0.0043048858642578125, + "learning_rate": 9.578154445141713e-07, + "loss": -0.047, + "num_tokens": 40561993.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9634462594985962, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03339743447954228, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11990082461931846, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08255189164891873, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 901 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1286.0, + "completions/mean_length": 1158.0, + "completions/mean_terminated_length": 1135.2000732421875, + "completions/min_length": 938.0, + "completions/min_terminated_length": 938.0, + "epoch": 0.22555638909727432, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8139380502640363, + "kl": 0.0025081634521484375, + "learning_rate": 9.576492380879403e-07, + "loss": -0.0207, + "num_tokens": 40601145.0, + "reward": 0.0, + "reward_std": 1.0177514553070068, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.006404591325126581, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.22470498401472547, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.061913918736689055, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 902 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1214.0, + "completions/mean_length": 1458.5, + "completions/mean_terminated_length": 1168.0, + "completions/min_length": 1122.0, + "completions/min_terminated_length": 1122.0, + "epoch": 0.22580645161290322, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4953172098241767, + "kl": 0.004421234130859375, + "learning_rate": 9.574827210465411e-07, + "loss": 0.0059, + "num_tokens": 40658161.0, + "reward": -4.470348358154297e-08, + "reward_std": 1.0310232639312744, + "rewards/wordcountpos_reward_GEOBench/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 903 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1355.0, + "completions/mean_length": 1154.875, + "completions/mean_terminated_length": 947.7999877929688, + "completions/min_length": 703.0, + "completions/min_terminated_length": 703.0, + "epoch": 0.22605651412853214, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.132609594711258, + "kl": 0.0120086669921875, + "learning_rate": 9.573158935168543e-07, + "loss": -0.0464, + "num_tokens": 40718943.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9943860769271851, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005516084471505095, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09704699893626702, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07084150279686702, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 904 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1364.0, + "completions/mean_length": 1099.3125, + "completions/mean_terminated_length": 1006.84619140625, + "completions/min_length": 770.0, + "completions/min_terminated_length": 770.0, + "epoch": 0.22630657664416104, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.931358463848216, + "kl": 0.0057830810546875, + "learning_rate": 9.57148755625997e-07, + "loss": 0.0301, + "num_tokens": 40765140.0, + "reward": 0.0, + "reward_std": 0.6684819459915161, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.18036215339077466, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20120437408251113, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1080980350662545, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 905 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1177.3125, + "completions/mean_terminated_length": 1155.800048828125, + "completions/min_length": 751.0, + "completions/min_terminated_length": 751.0, + "epoch": 0.22655663915978994, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.515261879494972, + "kl": 0.005313873291015625, + "learning_rate": 9.56981307501323e-07, + "loss": -0.0768, + "num_tokens": 40808057.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6213929057121277, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.30544619242913695, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.43473767635975935, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08432740427115676, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 906 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1345.0, + "completions/mean_length": 1129.0, + "completions/mean_terminated_length": 906.4000244140625, + "completions/min_length": 715.0, + "completions/min_terminated_length": 715.0, + "epoch": 0.22680670167541886, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.709005766857857, + "kl": 0.006031036376953125, + "learning_rate": 9.568135492704214e-07, + "loss": -0.0073, + "num_tokens": 40848849.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6981069445610046, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005995718122546446, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21967498189555815, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09727776191382574, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 907 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1256.75, + "completions/mean_terminated_length": 1200.615478515625, + "completions/min_length": 936.0, + "completions/min_terminated_length": 936.0, + "epoch": 0.22705676419104776, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6534096565171983, + "kl": 0.00551605224609375, + "learning_rate": 9.566454810611191e-07, + "loss": -0.0417, + "num_tokens": 40896597.0, + "reward": 0.0, + "reward_std": 1.0426738262176514, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.038661592172334676, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08776424490393415, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05947299418254504, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 908 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1379.0, + "completions/mean_length": 1127.0, + "completions/mean_terminated_length": 1102.1334228515625, + "completions/min_length": 843.0, + "completions/min_terminated_length": 843.0, + "epoch": 0.22730682670667668, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.264595062822188, + "kl": 0.0062103271484375, + "learning_rate": 9.564771030014784e-07, + "loss": 0.0268, + "num_tokens": 40954669.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9340046048164368, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0001999921157252423, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07013261952180272, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066221, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 909 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1333.0, + "completions/max_terminated_length": 1333.0, + "completions/mean_length": 1002.0625, + "completions/mean_terminated_length": 1002.0625, + "completions/min_length": 865.0, + "completions/min_terminated_length": 865.0, + "epoch": 0.22755688922230558, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5414503555412815, + "kl": 0.0033111572265625, + "learning_rate": 9.563084152197976e-07, + "loss": 0.0157, + "num_tokens": 40999182.0, + "reward": 0.0, + "reward_std": 0.8881155252456665, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02769345298774402, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07921616233834544, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08432740427115681, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 910 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1336.0, + "completions/max_terminated_length": 1336.0, + "completions/mean_length": 963.3125, + "completions/mean_terminated_length": 963.3125, + "completions/min_length": 640.0, + "completions/min_terminated_length": 640.0, + "epoch": 0.22780695173793447, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.03913259245381, + "kl": 0.00823974609375, + "learning_rate": 9.56139417844611e-07, + "loss": 0.0486, + "num_tokens": 41043819.0, + "reward": 0.0, + "reward_std": 0.7103468775749207, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.016057485502779915, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09339914669961917, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14907119849998599, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 911 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1485.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1197.25, + "completions/mean_terminated_length": 1197.25, + "completions/min_length": 816.0, + "completions/min_terminated_length": 816.0, + "epoch": 0.2280570142535634, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3657485006140724, + "kl": 0.0078887939453125, + "learning_rate": 9.559701110046895e-07, + "loss": -0.0341, + "num_tokens": 41098911.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0286931991577148, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.15049983375270826, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06260784376832373, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07685966046898342, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 912 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 1205.625, + "completions/mean_terminated_length": 1029.0, + "completions/min_length": 824.0, + "completions/min_terminated_length": 824.0, + "epoch": 0.2283070767691923, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.458202949434998, + "kl": 0.005138397216796875, + "learning_rate": 9.55800494829039e-07, + "loss": 0.0014, + "num_tokens": 41146289.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.6577305793762207, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.020122318517896993, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09666672469566065, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11538983843829065, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 913 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1382.0, + "completions/mean_length": 1146.375, + "completions/mean_terminated_length": 1122.800048828125, + "completions/min_length": 729.0, + "completions/min_terminated_length": 729.0, + "epoch": 0.22855713928482121, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.571121922497702, + "kl": 0.00829315185546875, + "learning_rate": 9.556305694469012e-07, + "loss": -0.0063, + "num_tokens": 41190055.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.0609935522079468, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03700806290152918, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13824975540204137, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7124999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0758897836290186, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 914 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1324.0, + "completions/max_terminated_length": 1324.0, + "completions/mean_length": 922.5625, + "completions/mean_terminated_length": 922.5625, + "completions/min_length": 636.0, + "completions/min_terminated_length": 636.0, + "epoch": 0.2288072018004501, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1030521496889767, + "kl": 0.0074615478515625, + "learning_rate": 9.554603349877538e-07, + "loss": 0.0079, + "num_tokens": 41228976.0, + "reward": 2.60770320892334e-08, + "reward_std": 1.0688116550445557, + "rewards/wordcountpos_reward_GEOBench/mean": 2.60770320892334e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00555193597508062, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06067603379241259, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10671873729054751, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 915 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1472.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1006.375, + "completions/mean_terminated_length": 1006.375, + "completions/min_length": 500.0, + "completions/min_terminated_length": 500.0, + "epoch": 0.229057264316079, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1727207793762395, + "kl": 0.0055084228515625, + "learning_rate": 9.552897915813101e-07, + "loss": -0.0914, + "num_tokens": 41258398.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7364643216133118, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03244070134865884, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10003673740602055, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042258, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 916 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1490.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1214.0, + "completions/mean_terminated_length": 1214.0, + "completions/min_length": 983.0, + "completions/min_terminated_length": 983.0, + "epoch": 0.22930732683170793, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9913854381732348, + "kl": 0.0063323974609375, + "learning_rate": 9.55118939357518e-07, + "loss": 0.0073, + "num_tokens": 41302390.0, + "reward": 0.0, + "reward_std": 0.9091365337371826, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01027755344457499, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.051372076261242634, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0909822937597079, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 917 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1212.0, + "completions/max_terminated_length": 1212.0, + "completions/mean_length": 972.875, + "completions/mean_terminated_length": 972.875, + "completions/min_length": 728.0, + "completions/min_terminated_length": 728.0, + "epoch": 0.22955738934733683, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.62283231874674, + "kl": 0.00804901123046875, + "learning_rate": 9.549477784465613e-07, + "loss": 0.0027, + "num_tokens": 41342484.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7713350057601929, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06234742359791335, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10054907510409605, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07290277645477447, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 918 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1158.0, + "completions/max_terminated_length": 1158.0, + "completions/mean_length": 980.5, + "completions/mean_terminated_length": 980.5, + "completions/min_length": 875.0, + "completions/min_terminated_length": 875.0, + "epoch": 0.22980745186296575, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9211952213754366, + "kl": 0.0050048828125, + "learning_rate": 9.547763089788592e-07, + "loss": -0.0154, + "num_tokens": 41382308.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8125698566436768, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03203873268103804, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10906939501856831, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 919 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 989.625, + "completions/mean_terminated_length": 955.6000366210938, + "completions/min_length": 598.0, + "completions/min_terminated_length": 598.0, + "epoch": 0.23005751437859465, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5353608212761394, + "kl": 0.0079193115234375, + "learning_rate": 9.546045310850654e-07, + "loss": -0.0258, + "num_tokens": 41423374.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7119932174682617, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.024976965346095763, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04220293162599737, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026005, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 920 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1239.3125, + "completions/mean_terminated_length": 1221.933349609375, + "completions/min_length": 1038.0, + "completions/min_terminated_length": 1038.0, + "epoch": 0.23030757689422354, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.299901073189787, + "kl": 0.004108428955078125, + "learning_rate": 9.544324448960692e-07, + "loss": -0.0177, + "num_tokens": 41465667.0, + "reward": 0.0, + "reward_std": 0.9662386775016785, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08448339557448738, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07725321770745203, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258098, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 921 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1295.0, + "completions/mean_length": 1233.4375, + "completions/mean_terminated_length": 1171.923095703125, + "completions/min_length": 999.0, + "completions/min_terminated_length": 999.0, + "epoch": 0.23055763940985247, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.58288363795885, + "kl": 0.0089263916015625, + "learning_rate": 9.542600505429943e-07, + "loss": -0.0228, + "num_tokens": 41517274.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0514562129974365, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.023025156791959253, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05776418796013551, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133132, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 922 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1183.0, + "completions/mean_length": 1208.625, + "completions/mean_terminated_length": 917.25, + "completions/min_length": 811.0, + "completions/min_terminated_length": 811.0, + "epoch": 0.23080770192548136, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.692984545999283, + "kl": 0.002193450927734375, + "learning_rate": 9.540873481571997e-07, + "loss": -0.0069, + "num_tokens": 41564956.0, + "reward": 0.0, + "reward_std": 0.9003636240959167, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0678680253655601, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10273346023959566, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13743685418725538, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 923 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1466.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1056.9375, + "completions/mean_terminated_length": 1056.9375, + "completions/min_length": 782.0, + "completions/min_terminated_length": 782.0, + "epoch": 0.23105776444111029, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9248726777001877, + "kl": 0.00728607177734375, + "learning_rate": 9.53914337870279e-07, + "loss": -0.0266, + "num_tokens": 41607835.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0100815296173096, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07274089150405369, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1430368735601867, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0909822937597079, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 924 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1432.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1057.625, + "completions/mean_terminated_length": 1057.625, + "completions/min_length": 762.0, + "completions/min_terminated_length": 762.0, + "epoch": 0.23130782695673918, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6180975195496, + "kl": 0.004913330078125, + "learning_rate": 9.5374101981406e-07, + "loss": -0.0058, + "num_tokens": 41654525.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7294851541519165, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05519496569671066, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06785788886495633, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 925 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1192.625, + "completions/mean_terminated_length": 1121.6923828125, + "completions/min_length": 744.0, + "completions/min_terminated_length": 744.0, + "epoch": 0.2315578894723681, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3620470285404576, + "kl": 0.0096282958984375, + "learning_rate": 9.535673941206053e-07, + "loss": 0.0024, + "num_tokens": 41698031.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0271393060684204, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.010588957505432837, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.044157423395389486, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06440611887195306, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 926 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1354.0, + "completions/max_terminated_length": 1354.0, + "completions/mean_length": 1120.25, + "completions/mean_terminated_length": 1120.25, + "completions/min_length": 717.0, + "completions/min_terminated_length": 717.0, + "epoch": 0.231807951987997, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9457656211286725, + "kl": 0.0029201507568359375, + "learning_rate": 9.53393460922212e-07, + "loss": 0.0029, + "num_tokens": 41734347.0, + "reward": 0.0, + "reward_std": 0.7099385261535645, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.107688470643453, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1327119571367633, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07503085784948506, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 927 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1292.5625, + "completions/mean_terminated_length": 1262.9285888671875, + "completions/min_length": 1075.0, + "completions/min_terminated_length": 1075.0, + "epoch": 0.2320580145036259, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.602433006116639, + "kl": 0.005767822265625, + "learning_rate": 9.532192203514115e-07, + "loss": 0.0109, + "num_tokens": 41777260.0, + "reward": 0.0, + "reward_std": 0.8069620132446289, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0949092766018347, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13367970045501754, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11080513425729775, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 928 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1379.0, + "completions/max_terminated_length": 1379.0, + "completions/mean_length": 1095.125, + "completions/mean_terminated_length": 1095.125, + "completions/min_length": 948.0, + "completions/min_terminated_length": 948.0, + "epoch": 0.23230807701925482, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.910542191013473, + "kl": 0.0113372802734375, + "learning_rate": 9.530446725409692e-07, + "loss": 0.0015, + "num_tokens": 41826094.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0436738729476929, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.021246250651329505, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05005019332980835, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08777074514725113, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 929 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1241.25, + "completions/mean_terminated_length": 1204.2857666015625, + "completions/min_length": 655.0, + "completions/min_terminated_length": 655.0, + "epoch": 0.23255813953488372, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9179551969204813, + "kl": 0.0069580078125, + "learning_rate": 9.528698176238848e-07, + "loss": -0.0233, + "num_tokens": 41874274.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.6551786661148071, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03674993894677117, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07499180239658208, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 930 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1076.0, + "completions/mean_length": 1031.125, + "completions/mean_terminated_length": 666.4444580078125, + "completions/min_length": 356.0, + "completions/min_terminated_length": 356.0, + "epoch": 0.23280820205051264, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.195372575214075, + "kl": 0.0068206787109375, + "learning_rate": 9.526946557333919e-07, + "loss": 0.0289, + "num_tokens": 41909396.0, + "reward": 0.0, + "reward_std": 0.6490310430526733, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1987178429512529, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.27179192191475376, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11547005383792516, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 931 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1125.4375, + "completions/mean_terminated_length": 1039.0, + "completions/min_length": 695.0, + "completions/min_terminated_length": 695.0, + "epoch": 0.23305826456614154, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.455676542922897, + "kl": 0.00876617431640625, + "learning_rate": 9.52519187002958e-07, + "loss": -0.0571, + "num_tokens": 41963571.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8455194234848022, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.058124266108466535, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05307192716941397, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07187952884282608, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 932 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1341.0, + "completions/max_terminated_length": 1341.0, + "completions/mean_length": 1006.75, + "completions/mean_terminated_length": 1006.75, + "completions/min_length": 732.0, + "completions/min_terminated_length": 732.0, + "epoch": 0.23330832708177043, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3329336487321854, + "kl": 0.0069580078125, + "learning_rate": 9.523434115662844e-07, + "loss": 0.0179, + "num_tokens": 41994271.0, + "reward": 0.0, + "reward_std": 0.705262303352356, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.15750602322787158, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1750786498883796, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08255189164891873, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 933 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1389.0, + "completions/max_terminated_length": 1389.0, + "completions/mean_length": 1106.5, + "completions/mean_terminated_length": 1106.5, + "completions/min_length": 830.0, + "completions/min_terminated_length": 830.0, + "epoch": 0.23355838959739936, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8056877246640943, + "kl": 0.0052642822265625, + "learning_rate": 9.521673295573064e-07, + "loss": -0.0421, + "num_tokens": 42038839.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8492753505706787, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.042864849701357594, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12236457548071396, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242309, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 934 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1440.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1132.5, + "completions/mean_terminated_length": 1132.5, + "completions/min_length": 858.0, + "completions/min_terminated_length": 858.0, + "epoch": 0.23380845211302825, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.672446458244542, + "kl": 0.0057220458984375, + "learning_rate": 9.519909411101923e-07, + "loss": 0.0242, + "num_tokens": 42070703.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.029476523399353, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.026296064160324144, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.028647524384567687, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11729986896522632, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 935 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1304.0, + "completions/max_terminated_length": 1304.0, + "completions/mean_length": 855.125, + "completions/mean_terminated_length": 855.125, + "completions/min_length": 660.0, + "completions/min_terminated_length": 660.0, + "epoch": 0.23405851462865718, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3197563879048997, + "kl": 0.005340576171875, + "learning_rate": 9.518142463593444e-07, + "loss": 0.0394, + "num_tokens": 42108449.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0339194536209106, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.017863476881164436, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04148850062773506, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.077817450199525, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 936 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1040.0, + "completions/max_terminated_length": 1040.0, + "completions/mean_length": 876.6875, + "completions/mean_terminated_length": 876.6875, + "completions/min_length": 588.0, + "completions/min_terminated_length": 588.0, + "epoch": 0.23430857714428607, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8280823636367503, + "kl": 0.004207611083984375, + "learning_rate": 9.516372454393983e-07, + "loss": -0.0318, + "num_tokens": 42140012.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9367499947547913, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08355905146253573, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.012710095893753602, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252813, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 937 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1307.0, + "completions/max_terminated_length": 1307.0, + "completions/mean_length": 918.5, + "completions/mean_terminated_length": 918.5, + "completions/min_length": 678.0, + "completions/min_terminated_length": 678.0, + "epoch": 0.23455863965991497, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5166074786577424, + "kl": 0.0077667236328125, + "learning_rate": 9.514599384852227e-07, + "loss": -0.075, + "num_tokens": 42182884.0, + "reward": 0.0, + "reward_std": 0.8465733528137207, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.028355022846885052, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07073431212385506, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11792967144619462, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 938 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1190.6875, + "completions/mean_terminated_length": 1119.3077392578125, + "completions/min_length": 931.0, + "completions/min_terminated_length": 931.0, + "epoch": 0.2348087021755439, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.323048863872927, + "kl": 0.00390625, + "learning_rate": 9.512823256319196e-07, + "loss": 0.0096, + "num_tokens": 42223751.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0553052425384521, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.022857677025163246, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11226703584701862, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06763190130459203, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 939 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 1042.375, + "completions/mean_terminated_length": 1011.86669921875, + "completions/min_length": 530.0, + "completions/min_terminated_length": 530.0, + "epoch": 0.2350587646911728, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.489379008383892, + "kl": 0.00817108154296875, + "learning_rate": 9.51104407014824e-07, + "loss": 0.0398, + "num_tokens": 42266949.0, + "reward": 0.0, + "reward_std": 0.7760323286056519, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05584614916529647, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04853073429902689, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11080513425729774, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 940 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1419.0, + "completions/mean_length": 1091.375, + "completions/mean_terminated_length": 905.6364135742188, + "completions/min_length": 613.0, + "completions/min_terminated_length": 613.0, + "epoch": 0.2353088272068017, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.743739294450297, + "kl": 0.00823211669921875, + "learning_rate": 9.509261827695043e-07, + "loss": 0.0681, + "num_tokens": 42321187.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9885901808738708, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009365247363520079, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1332999518882814, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08595864638818417, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 941 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1491.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1109.8125, + "completions/mean_terminated_length": 1109.8125, + "completions/min_length": 752.0, + "completions/min_terminated_length": 752.0, + "epoch": 0.2355588897224306, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.151120777622698, + "kl": 0.007965087890625, + "learning_rate": 9.507476530317611e-07, + "loss": -0.0006, + "num_tokens": 42364096.0, + "reward": 0.0, + "reward_std": 0.9442886114120483, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09391547059435044, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15595778529075893, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590962, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 942 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1340.0, + "completions/mean_length": 962.1875, + "completions/mean_terminated_length": 926.3333740234375, + "completions/min_length": 454.0, + "completions/min_terminated_length": 454.0, + "epoch": 0.2358089522380595, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9331943246741505, + "kl": 0.00707244873046875, + "learning_rate": 9.505688179376283e-07, + "loss": 0.0028, + "num_tokens": 42403899.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.894816517829895, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.006987845700594927, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.023121917132615723, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 943 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1186.0, + "completions/max_terminated_length": 1186.0, + "completions/mean_length": 920.625, + "completions/mean_terminated_length": 920.625, + "completions/min_length": 646.0, + "completions/min_terminated_length": 646.0, + "epoch": 0.23605901475368843, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.898571058098812, + "kl": 0.00960540771484375, + "learning_rate": 9.503896776233724e-07, + "loss": -0.0323, + "num_tokens": 42436341.0, + "reward": 0.0, + "reward_std": 0.5919628143310547, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0509264155872073, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09019309050674307, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1102186379345533, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 944 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1239.0, + "completions/max_terminated_length": 1239.0, + "completions/mean_length": 1019.8125, + "completions/mean_terminated_length": 1019.8125, + "completions/min_length": 732.0, + "completions/min_terminated_length": 732.0, + "epoch": 0.23630907726931732, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7843913682696693, + "kl": 0.0055999755859375, + "learning_rate": 9.502102322254921e-07, + "loss": -0.0246, + "num_tokens": 42481538.0, + "reward": 0.0, + "reward_std": 0.9603627920150757, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.011442347145761157, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08479848260956252, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05146016078626406, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 945 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1341.4375, + "completions/mean_terminated_length": 1304.84619140625, + "completions/min_length": 869.0, + "completions/min_terminated_length": 869.0, + "epoch": 0.23655913978494625, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.520333854174939, + "kl": 0.00699615478515625, + "learning_rate": 9.500304818807191e-07, + "loss": -0.0234, + "num_tokens": 42538609.0, + "reward": -6.332993507385254e-08, + "reward_std": 1.0640571117401123, + "rewards/wordcountpos_reward_GEOBench/mean": -6.332993507385254e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.060600440544383245, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10759435585338044, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16141733350404336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 946 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1392.0, + "completions/mean_length": 994.1875, + "completions/mean_terminated_length": 877.4615478515625, + "completions/min_length": 491.0, + "completions/min_terminated_length": 491.0, + "epoch": 0.23680920230057514, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.428697343908821, + "kl": 0.00469207763671875, + "learning_rate": 9.498504267260169e-07, + "loss": 0.0164, + "num_tokens": 42568116.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9839252233505249, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.046654477615942685, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06821694269231128, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16147468555186623, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 947 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1185.125, + "completions/mean_terminated_length": 1140.1429443359375, + "completions/min_length": 825.0, + "completions/min_terminated_length": 825.0, + "epoch": 0.23705926481620404, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.176863012583183, + "kl": 0.008270263671875, + "learning_rate": 9.496700668985817e-07, + "loss": -0.0103, + "num_tokens": 42617478.0, + "reward": 0.0, + "reward_std": 1.0007305145263672, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.015406199492636441, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09689738236184417, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952502, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 948 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1157.625, + "completions/mean_terminated_length": 1108.71435546875, + "completions/min_length": 755.0, + "completions/min_terminated_length": 755.0, + "epoch": 0.23730932733183296, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.556351563058199, + "kl": 0.00862884521484375, + "learning_rate": 9.494894025358417e-07, + "loss": -0.0182, + "num_tokens": 42661608.0, + "reward": 0.0, + "reward_std": 0.642557680606842, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.011588169050226423, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04039653735357576, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14395215254459456, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 949 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1263.0, + "completions/max_terminated_length": 1263.0, + "completions/mean_length": 979.25, + "completions/mean_terminated_length": 979.25, + "completions/min_length": 675.0, + "completions/min_terminated_length": 675.0, + "epoch": 0.23755938984746186, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.174471125516864, + "kl": 0.00626373291015625, + "learning_rate": 9.493084337754571e-07, + "loss": -0.0228, + "num_tokens": 42690908.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0576820373535156, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14605934866804432, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 950 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1431.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 918.625, + "completions/mean_terminated_length": 918.625, + "completions/min_length": 683.0, + "completions/min_terminated_length": 683.0, + "epoch": 0.23780945236309078, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2099467249758513, + "kl": 0.00637054443359375, + "learning_rate": 9.491271607553202e-07, + "loss": -0.0737, + "num_tokens": 42741830.0, + "reward": 0.0, + "reward_std": 0.5855954885482788, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11331929871506097, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10074070403712988, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1287403358472941, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 951 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1329.1875, + "completions/mean_terminated_length": 1272.25, + "completions/min_length": 1024.0, + "completions/min_terminated_length": 1024.0, + "epoch": 0.23805951487871968, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.227566242790243, + "kl": 0.00958251953125, + "learning_rate": 9.489455836135549e-07, + "loss": -0.0283, + "num_tokens": 42795153.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0561772584915161, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.006282064616878502, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.045200787447816394, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087683, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 952 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1402.0, + "completions/mean_length": 1313.375, + "completions/mean_terminated_length": 1201.4000244140625, + "completions/min_length": 1009.0, + "completions/min_terminated_length": 1009.0, + "epoch": 0.23830957739434858, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4613494023205913, + "kl": 0.004856109619140625, + "learning_rate": 9.487637024885169e-07, + "loss": -0.033, + "num_tokens": 42847103.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8166074156761169, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06632653726870066, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11006614961212595, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941139, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 953 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1391.0, + "completions/mean_terminated_length": 1282.0, + "completions/min_length": 1152.0, + "completions/min_terminated_length": 1152.0, + "epoch": 0.2385596399099775, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5035538176033727, + "kl": 0.0066986083984375, + "learning_rate": 9.485815175187937e-07, + "loss": 0.0222, + "num_tokens": 42908295.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9402428865432739, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01916201130069423, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.028695831408947277, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10945995377982527, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 954 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1380.0, + "completions/max_terminated_length": 1380.0, + "completions/mean_length": 1066.25, + "completions/mean_terminated_length": 1066.25, + "completions/min_length": 803.0, + "completions/min_terminated_length": 803.0, + "epoch": 0.2388097024256064, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.525839029577522, + "kl": 0.009185791015625, + "learning_rate": 9.483990288432041e-07, + "loss": -0.0381, + "num_tokens": 42943235.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9105735421180725, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019394698417269665, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09937554904810676, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12164002752505566, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 955 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1215.0, + "completions/mean_terminated_length": 1120.0, + "completions/min_length": 873.0, + "completions/min_terminated_length": 873.0, + "epoch": 0.23905976494123532, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.494877817661855, + "kl": 0.00848388671875, + "learning_rate": 9.482162366007985e-07, + "loss": -0.0225, + "num_tokens": 42999043.0, + "reward": 0.0, + "reward_std": 0.8165233135223389, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.033477864730945335, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08925843560282601, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 956 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1355.0, + "completions/mean_length": 1122.75, + "completions/mean_terminated_length": 1097.60009765625, + "completions/min_length": 836.0, + "completions/min_terminated_length": 836.0, + "epoch": 0.23930982745686422, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5716646415036744, + "kl": 0.009765625, + "learning_rate": 9.480331409308581e-07, + "loss": -0.009, + "num_tokens": 43050575.0, + "reward": 0.0, + "reward_std": 0.47468724846839905, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04478812540302747, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.053021029971950934, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.675, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14981470036162822, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 957 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1386.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 1112.75, + "completions/mean_terminated_length": 1112.75, + "completions/min_length": 696.0, + "completions/min_terminated_length": 696.0, + "epoch": 0.2395598899724931, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1505663351374746, + "kl": 0.00789642333984375, + "learning_rate": 9.478497419728964e-07, + "loss": 0.0097, + "num_tokens": 43102819.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.905625581741333, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01672091261263595, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06435380609999879, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0687184270936277, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 958 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1398.4375, + "completions/mean_terminated_length": 1296.875, + "completions/min_length": 1070.0, + "completions/min_terminated_length": 1070.0, + "epoch": 0.23980995248812204, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0657560666389627, + "kl": 0.007659912109375, + "learning_rate": 9.476660398666568e-07, + "loss": -0.0152, + "num_tokens": 43161802.0, + "reward": 0.0, + "reward_std": 0.7598558664321899, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.034238212843739985, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06990515779109208, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242309, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 959 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1018.4375, + "completions/mean_terminated_length": 986.3333740234375, + "completions/min_length": 568.0, + "completions/min_terminated_length": 568.0, + "epoch": 0.24006001500375093, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0005845920889027, + "kl": 0.0065765380859375, + "learning_rate": 9.474820347521144e-07, + "loss": -0.0229, + "num_tokens": 43201737.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8196992874145508, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0387811992032703, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03778669075167926, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09496588081262933, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 960 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1367.0, + "completions/max_terminated_length": 1367.0, + "completions/mean_length": 1060.125, + "completions/mean_terminated_length": 1060.125, + "completions/min_length": 822.0, + "completions/min_terminated_length": 822.0, + "epoch": 0.24031007751937986, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9229125126042175, + "kl": 0.011810302734375, + "learning_rate": 9.47297726769475e-07, + "loss": 0.0197, + "num_tokens": 43253315.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7528923749923706, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03389727434036778, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10004878062074438, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316813, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 961 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1196.0, + "completions/max_terminated_length": 1196.0, + "completions/mean_length": 905.625, + "completions/mean_terminated_length": 905.625, + "completions/min_length": 587.0, + "completions/min_terminated_length": 587.0, + "epoch": 0.24056014003500875, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0979371682740995, + "kl": 0.00673675537109375, + "learning_rate": 9.471131160591751e-07, + "loss": -0.0238, + "num_tokens": 43294845.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0232510566711426, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1039558219671088, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0764567269044325, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.141878925953186, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 962 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1470.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1078.5, + "completions/mean_terminated_length": 1078.5, + "completions/min_length": 833.0, + "completions/min_terminated_length": 833.0, + "epoch": 0.24081020255063765, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5472033691792406, + "kl": 0.00853729248046875, + "learning_rate": 9.469282027618819e-07, + "loss": -0.0205, + "num_tokens": 43353133.0, + "reward": 0.0, + "reward_std": 0.8156346678733826, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02929043618761222, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20723780610033068, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7124999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05821416398857661, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 963 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1315.0, + "completions/mean_length": 1255.3125, + "completions/mean_terminated_length": 1065.0, + "completions/min_length": 750.0, + "completions/min_terminated_length": 750.0, + "epoch": 0.24106026506626657, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9801306714040945, + "kl": 0.006587982177734375, + "learning_rate": 9.467429870184931e-07, + "loss": -0.0191, + "num_tokens": 43410450.0, + "reward": 0.0, + "reward_std": 0.9207369089126587, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06893678899253197, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1123045046421865, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590966, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 964 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1476.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1276.75, + "completions/mean_terminated_length": 1276.75, + "completions/min_length": 968.0, + "completions/min_terminated_length": 968.0, + "epoch": 0.24131032758189547, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.950470558639067, + "kl": 0.00725555419921875, + "learning_rate": 9.465574689701371e-07, + "loss": -0.0217, + "num_tokens": 43465214.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9610758423805237, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019549167063879803, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13250290306755522, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07698003589195013, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 965 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1014.0, + "completions/mean_length": 1027.3125, + "completions/mean_terminated_length": 959.7857666015625, + "completions/min_length": 783.0, + "completions/min_terminated_length": 783.0, + "epoch": 0.2415603900975244, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6162893541661396, + "kl": 0.003711700439453125, + "learning_rate": 9.463716487581725e-07, + "loss": -0.01, + "num_tokens": 43497723.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8026916980743408, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0066595362779744965, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0837109576201119, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14700718047466632, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 966 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1443.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1160.9375, + "completions/mean_terminated_length": 1160.9375, + "completions/min_length": 928.0, + "completions/min_terminated_length": 928.0, + "epoch": 0.2418104526131533, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0506113883242127, + "kl": 0.00685882568359375, + "learning_rate": 9.461855265241881e-07, + "loss": 0.0045, + "num_tokens": 43541474.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0340982675552368, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.002928901647888675, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0355075621395111, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563382, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 967 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1389.1875, + "completions/mean_terminated_length": 1338.8182373046875, + "completions/min_length": 1081.0, + "completions/min_terminated_length": 1081.0, + "epoch": 0.24206051512878218, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.762842335817606, + "kl": 0.007598876953125, + "learning_rate": 9.459991024100026e-07, + "loss": -0.0009, + "num_tokens": 43593917.0, + "reward": 0.0, + "reward_std": 0.8986847400665283, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0413796524754678, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10785715723320906, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10036968702787748, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 968 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1331.0, + "completions/mean_length": 1318.375, + "completions/mean_terminated_length": 1177.111083984375, + "completions/min_length": 767.0, + "completions/min_terminated_length": 767.0, + "epoch": 0.2423105776444111, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7675879797749565, + "kl": 0.00624847412109375, + "learning_rate": 9.458123765576654e-07, + "loss": 0.0034, + "num_tokens": 43650707.0, + "reward": 0.0, + "reward_std": 0.5627799034118652, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.027499153916333213, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07054570177207276, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.675, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.161245154965971, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 969 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1477.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1177.5625, + "completions/mean_terminated_length": 1177.5625, + "completions/min_length": 742.0, + "completions/min_terminated_length": 742.0, + "epoch": 0.24256064016004, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3619601481159496, + "kl": 0.0090179443359375, + "learning_rate": 9.456253491094552e-07, + "loss": -0.0423, + "num_tokens": 43705956.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0216336250305176, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.20844016124895437, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11034910835473163, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237135, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 970 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1333.0, + "completions/mean_length": 1203.3125, + "completions/mean_terminated_length": 1183.533447265625, + "completions/min_length": 981.0, + "completions/min_terminated_length": 981.0, + "epoch": 0.24281070267566893, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5089116667587126, + "kl": 0.0097198486328125, + "learning_rate": 9.454380202078805e-07, + "loss": -0.0148, + "num_tokens": 43753665.0, + "reward": 0.0, + "reward_std": 0.94871985912323, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0235842680317526, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04438745492664978, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1343709624716425, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 971 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1012.1875, + "completions/mean_terminated_length": 979.666748046875, + "completions/min_length": 604.0, + "completions/min_terminated_length": 604.0, + "epoch": 0.24306076519129782, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3254429726041494, + "kl": 0.01082611083984375, + "learning_rate": 9.452503899956799e-07, + "loss": -0.0548, + "num_tokens": 43795500.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7976430058479309, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08526323040007935, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05955873008180233, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0687184270936277, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 972 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1393.0, + "completions/mean_length": 981.375, + "completions/mean_terminated_length": 907.2857666015625, + "completions/min_length": 632.0, + "completions/min_terminated_length": 632.0, + "epoch": 0.24331082770692672, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8273616817228353, + "kl": 0.00565338134765625, + "learning_rate": 9.450624586158215e-07, + "loss": -0.001, + "num_tokens": 43850946.0, + "reward": 0.0, + "reward_std": 1.0679044723510742, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04509588947937588, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08337380867799697, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09651328828101763, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 973 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1080.5625, + "completions/mean_terminated_length": 1052.60009765625, + "completions/min_length": 788.0, + "completions/min_terminated_length": 788.0, + "epoch": 0.24356089022255564, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.225452207474673, + "kl": 0.00661468505859375, + "learning_rate": 9.448742262115026e-07, + "loss": -0.0503, + "num_tokens": 43888595.0, + "reward": -2.2351741790771484e-08, + "reward_std": 0.9961885213851929, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.015950763595562675, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.031294711648812666, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10470416879457554, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 974 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1079.25, + "completions/mean_terminated_length": 1051.2000732421875, + "completions/min_length": 750.0, + "completions/min_terminated_length": 750.0, + "epoch": 0.24381095273818454, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6893639667701255, + "kl": 0.00577545166015625, + "learning_rate": 9.4468569292615e-07, + "loss": 0.0325, + "num_tokens": 43921799.0, + "reward": 0.0, + "reward_std": 0.4111371636390686, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07888501303715732, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21747865646109893, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08777074514725113, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 975 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1433.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1227.4375, + "completions/mean_terminated_length": 1227.4375, + "completions/min_length": 890.0, + "completions/min_terminated_length": 890.0, + "epoch": 0.24406101525381346, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8445707572244237, + "kl": 0.00726318359375, + "learning_rate": 9.444968589034198e-07, + "loss": -0.0079, + "num_tokens": 43989150.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6846209168434143, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12178309922074788, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17192829218502564, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797315, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 976 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1277.0, + "completions/max_terminated_length": 1277.0, + "completions/mean_length": 1088.125, + "completions/mean_terminated_length": 1088.125, + "completions/min_length": 913.0, + "completions/min_terminated_length": 913.0, + "epoch": 0.24431107776944236, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7352364877654693, + "kl": 0.00620269775390625, + "learning_rate": 9.443077242871974e-07, + "loss": -0.0194, + "num_tokens": 44027624.0, + "reward": 0.0, + "reward_std": 0.9401774406433105, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05455755690049991, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09126914408539442, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1270024788326182, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 977 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1254.0, + "completions/max_terminated_length": 1254.0, + "completions/mean_length": 1071.875, + "completions/mean_terminated_length": 1071.875, + "completions/min_length": 873.0, + "completions/min_terminated_length": 873.0, + "epoch": 0.24456114028507125, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0992955622994516, + "kl": 0.00865936279296875, + "learning_rate": 9.44118289221597e-07, + "loss": -0.0365, + "num_tokens": 44059350.0, + "reward": 0.0, + "reward_std": 0.7762793898582458, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.014582983321155301, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.017171087495063263, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 978 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1378.0, + "completions/max_terminated_length": 1378.0, + "completions/mean_length": 914.625, + "completions/mean_terminated_length": 914.625, + "completions/min_length": 720.0, + "completions/min_terminated_length": 720.0, + "epoch": 0.24481120280070018, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5443533043562336, + "kl": 0.0074615478515625, + "learning_rate": 9.439285538509617e-07, + "loss": -0.0117, + "num_tokens": 44100512.0, + "reward": 0.0, + "reward_std": 0.8195239305496216, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.2400174962784777, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17217027077128605, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746357, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 979 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1347.0, + "completions/max_terminated_length": 1347.0, + "completions/mean_length": 1039.3125, + "completions/mean_terminated_length": 1039.3125, + "completions/min_length": 854.0, + "completions/min_terminated_length": 854.0, + "epoch": 0.24506126531632907, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.181523977305435, + "kl": 0.009429931640625, + "learning_rate": 9.437385183198637e-07, + "loss": 0.0117, + "num_tokens": 44138661.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0523405075073242, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0748955710967899, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12268005539363772, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0807373427759331, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 980 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1231.5, + "completions/mean_terminated_length": 1213.60009765625, + "completions/min_length": 1102.0, + "completions/min_terminated_length": 1102.0, + "epoch": 0.245311327831958, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.950807216240355, + "kl": 0.00824737548828125, + "learning_rate": 9.435481827731037e-07, + "loss": -0.0298, + "num_tokens": 44194757.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9012545943260193, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.20484169629993193, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14833579515729364, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11147163731607215, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 981 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1472.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 860.375, + "completions/mean_terminated_length": 860.375, + "completions/min_length": 295.0, + "completions/min_terminated_length": 295.0, + "epoch": 0.2455613903475869, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.743646734380061, + "kl": 0.0110321044921875, + "learning_rate": 9.433575473557113e-07, + "loss": -0.0776, + "num_tokens": 44222675.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.522010087966919, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1052922648440185, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04111503634452096, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15049301694147854, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 982 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1336.375, + "completions/mean_terminated_length": 1281.8333740234375, + "completions/min_length": 1133.0, + "completions/min_terminated_length": 1133.0, + "epoch": 0.2458114528632158, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8863138167448152, + "kl": 0.0082244873046875, + "learning_rate": 9.431666122129439e-07, + "loss": -0.014, + "num_tokens": 44269001.0, + "reward": 0.0, + "reward_std": 0.6691133975982666, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.015702593408954617, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03262947360616697, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.052880017930181315, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 983 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1083.0, + "completions/mean_length": 1264.875, + "completions/mean_terminated_length": 1029.75, + "completions/min_length": 951.0, + "completions/min_terminated_length": 951.0, + "epoch": 0.24606151537884471, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.3443187242128518, + "kl": 0.002399444580078125, + "learning_rate": 9.42975377490288e-07, + "loss": -0.0004, + "num_tokens": 44320639.0, + "reward": 0.0, + "reward_std": 0.7293726205825806, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06593544641436204, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2501604754988342, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797315, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 984 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1399.0, + "completions/mean_terminated_length": 1298.0, + "completions/min_length": 1047.0, + "completions/min_terminated_length": 1047.0, + "epoch": 0.2463115778944736, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.29342981941274, + "kl": 0.00554656982421875, + "learning_rate": 9.427838433334581e-07, + "loss": -0.0244, + "num_tokens": 44382031.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.011316180229187, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05749509549575421, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.060723910830963734, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13221755360572016, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 985 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1263.625, + "completions/mean_terminated_length": 1184.8333740234375, + "completions/min_length": 897.0, + "completions/min_terminated_length": 897.0, + "epoch": 0.24656164041010253, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5446590073754005, + "kl": 0.009857177734375, + "learning_rate": 9.425920098883967e-07, + "loss": -0.0769, + "num_tokens": 44425377.0, + "reward": 0.0, + "reward_std": 0.8110443353652954, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07260247500307761, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.051208215913740016, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13526380260918403, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 986 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1079.1875, + "completions/mean_terminated_length": 1019.0714721679688, + "completions/min_length": 732.0, + "completions/min_terminated_length": 732.0, + "epoch": 0.24681170292573143, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.106899940773694, + "kl": 0.00327301025390625, + "learning_rate": 9.423998773012747e-07, + "loss": -0.0758, + "num_tokens": 44477068.0, + "reward": 0.0, + "reward_std": 0.9565083980560303, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.15611151555055808, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07468447404517695, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042256, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 987 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1249.0, + "completions/mean_length": 1234.5, + "completions/mean_terminated_length": 969.0, + "completions/min_length": 820.0, + "completions/min_terminated_length": 820.0, + "epoch": 0.24706176544136035, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.394474868348308, + "kl": 0.00447845458984375, + "learning_rate": 9.422074457184908e-07, + "loss": -0.0057, + "num_tokens": 44531052.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.358110249042511, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.015203521413305476, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06316041642704762, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10809803506625451, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 988 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1207.0, + "completions/max_terminated_length": 1207.0, + "completions/mean_length": 925.1875, + "completions/mean_terminated_length": 925.1875, + "completions/min_length": 826.0, + "completions/min_terminated_length": 826.0, + "epoch": 0.24731182795698925, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7317982475362594, + "kl": 0.0055446624755859375, + "learning_rate": 9.420147152866713e-07, + "loss": -0.0145, + "num_tokens": 44571415.0, + "reward": 0.0, + "reward_std": 0.48777157068252563, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07693219337574718, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15147279970624247, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941139, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 989 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1228.0, + "completions/mean_length": 1023.8125, + "completions/mean_terminated_length": 992.0667114257812, + "completions/min_length": 580.0, + "completions/min_terminated_length": 580.0, + "epoch": 0.24756189047261815, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.221577034633903, + "kl": 0.0101318359375, + "learning_rate": 9.418216861526704e-07, + "loss": -0.0514, + "num_tokens": 44610564.0, + "reward": 0.0, + "reward_std": 0.8762257099151611, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1026533254208791, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10601989861010006, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09269623828717428, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 990 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1144.25, + "completions/mean_terminated_length": 1120.533447265625, + "completions/min_length": 868.0, + "completions/min_terminated_length": 868.0, + "epoch": 0.24781195298824707, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.417627105129949, + "kl": 0.007080078125, + "learning_rate": 9.416283584635699e-07, + "loss": -0.0218, + "num_tokens": 44646144.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9723121523857117, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04076788663285712, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09480264357123637, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1002773930432755, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 991 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1216.625, + "completions/mean_terminated_length": 1087.8182373046875, + "completions/min_length": 789.0, + "completions/min_terminated_length": 789.0, + "epoch": 0.24806201550387597, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5377615796597457, + "kl": 0.0096893310546875, + "learning_rate": 9.41434732366679e-07, + "loss": 0.0341, + "num_tokens": 44702618.0, + "reward": 3.3527612686157227e-08, + "reward_std": 1.0101433992385864, + "rewards/wordcountpos_reward_GEOBench/mean": 3.3527612686157227e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.24946460947751337, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.4439759431690878, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237132, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 992 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1383.0, + "completions/max_terminated_length": 1383.0, + "completions/mean_length": 1239.375, + "completions/mean_terminated_length": 1239.375, + "completions/min_length": 1003.0, + "completions/min_terminated_length": 1003.0, + "epoch": 0.2483120780195049, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0866352363698435, + "kl": 0.004608154296875, + "learning_rate": 9.412408080095344e-07, + "loss": 0.0007, + "num_tokens": 44749024.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0444862842559814, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.009399419713051445, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.013485459906705381, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.042163702135578414, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 993 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1117.125, + "completions/mean_terminated_length": 1062.4285888671875, + "completions/min_length": 700.0, + "completions/min_terminated_length": 700.0, + "epoch": 0.24856214053513379, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.155910576410107, + "kl": 0.0076751708984375, + "learning_rate": 9.410465855399003e-07, + "loss": 0.0061, + "num_tokens": 44791482.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7410385608673096, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0719370686464798, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1555842625652925, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 994 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1427.0, + "completions/mean_length": 1274.9375, + "completions/mean_terminated_length": 1223.0, + "completions/min_length": 1036.0, + "completions/min_terminated_length": 1036.0, + "epoch": 0.24881220305076268, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.970027045552459, + "kl": 0.0091400146484375, + "learning_rate": 9.408520651057672e-07, + "loss": 0.0189, + "num_tokens": 44834497.0, + "reward": 0.0, + "reward_std": 0.9619894623756409, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.042609291951189426, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0957304902726316, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14981470036162822, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 995 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1406.0, + "completions/mean_terminated_length": 1285.1429443359375, + "completions/min_length": 1071.0, + "completions/min_terminated_length": 1071.0, + "epoch": 0.2490622655663916, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3641303685849375, + "kl": 0.0048065185546875, + "learning_rate": 9.406572468553536e-07, + "loss": 0.0046, + "num_tokens": 44888361.0, + "reward": 0.0, + "reward_std": 0.9914342164993286, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0610626149617841, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18412710684472505, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6416666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14782371884055634, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 996 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1393.0, + "completions/max_terminated_length": 1393.0, + "completions/mean_length": 1104.375, + "completions/mean_terminated_length": 1104.375, + "completions/min_length": 825.0, + "completions/min_terminated_length": 825.0, + "epoch": 0.2493123280820205, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.135138566922446, + "kl": 0.0062713623046875, + "learning_rate": 9.404621309371046e-07, + "loss": 0.0149, + "num_tokens": 44920927.0, + "reward": 0.0, + "reward_std": 0.9298480749130249, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09282468907347065, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14424456679786582, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 997 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1293.3125, + "completions/mean_terminated_length": 1245.615478515625, + "completions/min_length": 1042.0, + "completions/min_terminated_length": 1042.0, + "epoch": 0.24956239059764943, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3399523149162427, + "kl": 0.010009765625, + "learning_rate": 9.402667174996917e-07, + "loss": 0.0211, + "num_tokens": 44969404.0, + "reward": 0.0, + "reward_std": 0.9176042079925537, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06787022622408106, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0950082619546522, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0909822937597079, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 998 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1300.0, + "completions/mean_length": 1108.0625, + "completions/mean_terminated_length": 1017.6154174804688, + "completions/min_length": 631.0, + "completions/min_terminated_length": 631.0, + "epoch": 0.24981245311327832, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3369367584756677, + "kl": 0.0112152099609375, + "learning_rate": 9.400710066920138e-07, + "loss": -0.0276, + "num_tokens": 45026077.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8962461352348328, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.028769141259589563, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08841489747043413, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08595864638818418, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 999 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1134.0, + "completions/max_terminated_length": 1134.0, + "completions/mean_length": 992.1875, + "completions/mean_terminated_length": 992.1875, + "completions/min_length": 723.0, + "completions/min_terminated_length": 723.0, + "epoch": 0.25006251562890724, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.255631565032446, + "kl": 0.00765228271484375, + "learning_rate": 9.398749986631957e-07, + "loss": -0.0133, + "num_tokens": 45061808.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9880810976028442, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06033951020112374, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06835760583873772, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1246476515504285, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1000 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1231.0, + "completions/mean_length": 1256.3125, + "completions/mean_terminated_length": 1110.0999755859375, + "completions/min_length": 992.0, + "completions/min_terminated_length": 992.0, + "epoch": 0.2503125781445361, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.940749952425076, + "kl": 0.0088043212890625, + "learning_rate": 9.396786935625893e-07, + "loss": -0.0213, + "num_tokens": 45116309.0, + "reward": 4.470348358154297e-08, + "reward_std": 0.9695332050323486, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0887242548520512, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07581191802705696, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1001 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1317.1875, + "completions/mean_terminated_length": 1207.5, + "completions/min_length": 1081.0, + "completions/min_terminated_length": 1081.0, + "epoch": 0.25056264066016504, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.622410575363394, + "kl": 0.005786895751953125, + "learning_rate": 9.394820915397723e-07, + "loss": -0.0101, + "num_tokens": 45178224.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7667928338050842, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06256506556862433, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06636159450746472, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0768596604689834, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1002 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1228.375, + "completions/mean_terminated_length": 1210.2667236328125, + "completions/min_length": 909.0, + "completions/min_terminated_length": 909.0, + "epoch": 0.25081270317579396, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2043915215522407, + "kl": 0.0100250244140625, + "learning_rate": 9.392851927445492e-07, + "loss": -0.0128, + "num_tokens": 45222214.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.0567686557769775, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07553839026719891, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11361085918572093, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12412657816683505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1003 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1033.0, + "completions/max_terminated_length": 1033.0, + "completions/mean_length": 747.0625, + "completions/mean_terminated_length": 747.0625, + "completions/min_length": 477.0, + "completions/min_terminated_length": 477.0, + "epoch": 0.25106276569142283, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2080070019139715, + "kl": 0.00556182861328125, + "learning_rate": 9.3908799732695e-07, + "loss": 0.0287, + "num_tokens": 45252663.0, + "reward": 0.0, + "reward_std": 0.8311686515808105, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.017680782637340655, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13596104136282355, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09953596037316068, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1004 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1398.125, + "completions/mean_terminated_length": 1296.25, + "completions/min_length": 1019.0, + "completions/min_terminated_length": 1019.0, + "epoch": 0.25131282820705175, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.901769194238577, + "kl": 0.0085296630859375, + "learning_rate": 9.388905054372314e-07, + "loss": -0.0265, + "num_tokens": 45311897.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9616295099258423, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.23140822109983797, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2664165552492221, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1167460047694551, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1005 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1305.875, + "completions/mean_terminated_length": 1278.1429443359375, + "completions/min_length": 1072.0, + "completions/min_terminated_length": 1072.0, + "epoch": 0.2515628907226807, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8931215269008677, + "kl": 0.004756927490234375, + "learning_rate": 9.386927172258753e-07, + "loss": 0.0078, + "num_tokens": 45359895.0, + "reward": 0.0, + "reward_std": 0.8866841793060303, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04670640583415772, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17029916951134458, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036264, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1006 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1257.5625, + "completions/mean_terminated_length": 1201.615478515625, + "completions/min_length": 883.0, + "completions/min_terminated_length": 883.0, + "epoch": 0.2518129532383096, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3376590706857634, + "kl": 0.00872802734375, + "learning_rate": 9.384946328435899e-07, + "loss": 0.01, + "num_tokens": 45407320.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0534512996673584, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.14027151951417938, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1478699351154501, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06440611887195309, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1007 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 1293.25, + "completions/mean_terminated_length": 1279.4666748046875, + "completions/min_length": 904.0, + "completions/min_terminated_length": 904.0, + "epoch": 0.25206301575393847, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6740983596755292, + "kl": 0.0076904296875, + "learning_rate": 9.382962524413091e-07, + "loss": 0.0132, + "num_tokens": 45460332.0, + "reward": 0.0, + "reward_std": 0.6587280631065369, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12043653050997707, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08865493342436871, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13817594795257457, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 1008 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1130.875, + "completions/mean_terminated_length": 1106.2667236328125, + "completions/min_length": 904.0, + "completions/min_terminated_length": 904.0, + "epoch": 0.2523130782695674, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.929260631161478, + "kl": 0.00934600830078125, + "learning_rate": 9.380975761701919e-07, + "loss": -0.0117, + "num_tokens": 45499706.0, + "reward": 0.0, + "reward_std": 0.6374462842941284, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.39471109225634116, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.4448858295929196, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10610965676722954, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1009 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1396.8125, + "completions/mean_terminated_length": 1316.5555419921875, + "completions/min_length": 1157.0, + "completions/min_terminated_length": 1157.0, + "epoch": 0.2525631407851963, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.077868715423754, + "kl": 0.00907135009765625, + "learning_rate": 9.378986041816231e-07, + "loss": -0.0128, + "num_tokens": 45556167.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0170480012893677, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.242414335187467, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3046480547456267, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10945995377982526, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1010 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1315.0, + "completions/max_terminated_length": 1315.0, + "completions/mean_length": 828.875, + "completions/mean_terminated_length": 828.875, + "completions/min_length": 613.0, + "completions/min_terminated_length": 613.0, + "epoch": 0.2528132033008252, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1136604949784417, + "kl": 0.00604248046875, + "learning_rate": 9.376993366272127e-07, + "loss": -0.028, + "num_tokens": 45587637.0, + "reward": 0.0, + "reward_std": 0.9219021797180176, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.20848397710747338, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.36958516268557184, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11474609652039004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1011 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1263.125, + "completions/mean_terminated_length": 1208.4615478515625, + "completions/min_length": 795.0, + "completions/min_terminated_length": 795.0, + "epoch": 0.2530632658164541, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1206394676655096, + "kl": 0.00937652587890625, + "learning_rate": 9.37499773658796e-07, + "loss": -0.0241, + "num_tokens": 45632031.0, + "reward": 0.0, + "reward_std": 0.7014469504356384, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.017844864294631328, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08603222963593088, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.18089284734953515, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1012 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1382.0, + "completions/mean_length": 1236.1875, + "completions/mean_terminated_length": 1198.5, + "completions/min_length": 1074.0, + "completions/min_terminated_length": 1074.0, + "epoch": 0.25331332833208303, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.411086338461042, + "kl": 0.0055942535400390625, + "learning_rate": 9.372999154284335e-07, + "loss": -0.0137, + "num_tokens": 45675866.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9546520709991455, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05780554762350368, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.027296560760500032, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1261979632400061, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1013 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1393.0, + "completions/mean_length": 1202.1875, + "completions/mean_terminated_length": 1159.6429443359375, + "completions/min_length": 768.0, + "completions/min_terminated_length": 768.0, + "epoch": 0.2535633908477119, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3781370419199406, + "kl": 0.01031494140625, + "learning_rate": 9.370997620884103e-07, + "loss": 0.0234, + "num_tokens": 45718677.0, + "reward": 0.0, + "reward_std": 0.6619548201560974, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.013663626699611985, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04278853135611688, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639735, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1014 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1103.0625, + "completions/mean_terminated_length": 1076.60009765625, + "completions/min_length": 654.0, + "completions/min_terminated_length": 654.0, + "epoch": 0.2538134533633408, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2198913795475326, + "kl": 0.0099029541015625, + "learning_rate": 9.368993137912368e-07, + "loss": -0.0513, + "num_tokens": 45761766.0, + "reward": 0.0, + "reward_std": 0.5924420356750488, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12186705278272113, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16538878662770767, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15563490039905004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1015 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1434.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1111.1875, + "completions/mean_terminated_length": 1111.1875, + "completions/min_length": 762.0, + "completions/min_terminated_length": 762.0, + "epoch": 0.25406351587896975, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.443941887813366, + "kl": 0.008056640625, + "learning_rate": 9.366985706896476e-07, + "loss": -0.0221, + "num_tokens": 45812881.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0334360599517822, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02896023842356065, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04804994982131485, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09339283817414602, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1016 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 1137.75, + "completions/mean_terminated_length": 1113.60009765625, + "completions/min_length": 712.0, + "completions/min_terminated_length": 712.0, + "epoch": 0.25431357839459867, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7651365840372173, + "kl": 0.0104827880859375, + "learning_rate": 9.364975329366027e-07, + "loss": -0.0697, + "num_tokens": 45866333.0, + "reward": 0.0, + "reward_std": 1.0447680950164795, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05664179013797277, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08087638781543362, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059628479399994425, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1017 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1200.3125, + "completions/mean_terminated_length": 1180.3333740234375, + "completions/min_length": 536.0, + "completions/min_terminated_length": 536.0, + "epoch": 0.25456364091022754, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.277158674823988, + "kl": 0.0113525390625, + "learning_rate": 9.362962006852858e-07, + "loss": 0.0457, + "num_tokens": 45920186.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.01776123046875, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01459734928463544, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.059876134376055164, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09727776191382574, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1018 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1334.25, + "completions/mean_terminated_length": 1258.9091796875, + "completions/min_length": 1009.0, + "completions/min_terminated_length": 1009.0, + "epoch": 0.25481370342585646, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.83795153957889, + "kl": 0.007415771484375, + "learning_rate": 9.360945740891057e-07, + "loss": -0.0006, + "num_tokens": 45966518.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.03525972366333, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08565442155767757, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08828339507977452, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408155, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1019 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1314.0, + "completions/mean_length": 1085.0, + "completions/mean_terminated_length": 1057.3333740234375, + "completions/min_length": 947.0, + "completions/min_terminated_length": 947.0, + "epoch": 0.2550637659414854, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.7887373629120795, + "kl": 0.0035858154296875, + "learning_rate": 9.358926533016948e-07, + "loss": -0.0374, + "num_tokens": 46009102.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9906073808670044, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.025056522521267072, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07115679081080285, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087683, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1020 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1396.0, + "completions/max_terminated_length": 1396.0, + "completions/mean_length": 1168.8125, + "completions/mean_terminated_length": 1168.8125, + "completions/min_length": 908.0, + "completions/min_terminated_length": 908.0, + "epoch": 0.25531382845711426, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.496400289798984, + "kl": 0.00658416748046875, + "learning_rate": 9.356904384769104e-07, + "loss": -0.0081, + "num_tokens": 46048075.0, + "reward": 0.0, + "reward_std": 0.8535093069076538, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.012111277995052686, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03204984136363698, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10878112581387149, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1021 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1400.0, + "completions/mean_length": 1086.0, + "completions/mean_terminated_length": 1058.4000244140625, + "completions/min_length": 607.0, + "completions/min_terminated_length": 607.0, + "epoch": 0.2555638909727432, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2415827091456766, + "kl": 0.00849151611328125, + "learning_rate": 9.354879297688333e-07, + "loss": -0.0237, + "num_tokens": 46081667.0, + "reward": 0.0, + "reward_std": 0.7783819437026978, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.041881839813639396, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03653251336395646, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408157, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1022 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1435.0, + "completions/mean_length": 1308.0625, + "completions/mean_terminated_length": 1192.9000244140625, + "completions/min_length": 740.0, + "completions/min_terminated_length": 740.0, + "epoch": 0.2558139534883721, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1749895254431197, + "kl": 0.0087432861328125, + "learning_rate": 9.352851273317683e-07, + "loss": -0.051, + "num_tokens": 46124916.0, + "reward": 0.0, + "reward_std": 0.8443607091903687, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00012482134409055716, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05754681967364159, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1023 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1189.0, + "completions/max_terminated_length": 1189.0, + "completions/mean_length": 920.3125, + "completions/mean_terminated_length": 920.3125, + "completions/min_length": 698.0, + "completions/min_terminated_length": 698.0, + "epoch": 0.256064016004001, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.658883662442756, + "kl": 0.009979248046875, + "learning_rate": 9.350820313202444e-07, + "loss": -0.0568, + "num_tokens": 46162129.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.061575174331665, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06599195473143661, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04996429518494551, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07503085784948504, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1024 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1425.0, + "completions/max_terminated_length": 1425.0, + "completions/mean_length": 1108.9375, + "completions/mean_terminated_length": 1108.9375, + "completions/min_length": 786.0, + "completions/min_terminated_length": 786.0, + "epoch": 0.2563140785196299, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.092270071714127, + "kl": 0.00435638427734375, + "learning_rate": 9.348786418890138e-07, + "loss": 0.0062, + "num_tokens": 46200600.0, + "reward": 0.0, + "reward_std": 0.9726424813270569, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.006670749166733823, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3686150525728837, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.053748384988657034, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1025 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1345.0, + "completions/max_terminated_length": 1345.0, + "completions/mean_length": 1175.625, + "completions/mean_terminated_length": 1175.625, + "completions/min_length": 1064.0, + "completions/min_terminated_length": 1064.0, + "epoch": 0.2565641410352588, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6192626307106033, + "kl": 0.01104736328125, + "learning_rate": 9.346749591930525e-07, + "loss": -0.011, + "num_tokens": 46252978.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8820825815200806, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03362914846928218, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1162068401216126, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07685966046898339, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1026 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1352.0, + "completions/mean_length": 1207.9375, + "completions/mean_terminated_length": 1166.21435546875, + "completions/min_length": 923.0, + "completions/min_terminated_length": 923.0, + "epoch": 0.25681420355088774, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6023497502667086, + "kl": 0.00959014892578125, + "learning_rate": 9.344709833875601e-07, + "loss": 0.0486, + "num_tokens": 46305081.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7562448978424072, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.024329980801536016, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.042930129247333575, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1027 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1322.0, + "completions/max_terminated_length": 1322.0, + "completions/mean_length": 1116.5, + "completions/mean_terminated_length": 1116.5, + "completions/min_length": 912.0, + "completions/min_terminated_length": 912.0, + "epoch": 0.2570642660665166, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8346144726845286, + "kl": 0.007833480834960938, + "learning_rate": 9.342667146279592e-07, + "loss": -0.0036, + "num_tokens": 46348913.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.030789852142334, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02318498628113391, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10579914574689728, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1028 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1378.0, + "completions/mean_length": 1193.625, + "completions/mean_terminated_length": 1149.857177734375, + "completions/min_length": 1012.0, + "completions/min_terminated_length": 1012.0, + "epoch": 0.25731432858214554, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.169297506259314, + "kl": 0.0069427490234375, + "learning_rate": 9.340621530698957e-07, + "loss": 0.0297, + "num_tokens": 46397675.0, + "reward": 0.0, + "reward_std": 0.44536200165748596, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11510894948646039, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1556337977944831, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07698003589195011, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1029 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1291.25, + "completions/mean_terminated_length": 1166.0, + "completions/min_length": 558.0, + "completions/min_terminated_length": 558.0, + "epoch": 0.25756439109777446, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8551969069683008, + "kl": 0.0094757080078125, + "learning_rate": 9.33857298869239e-07, + "loss": -0.1329, + "num_tokens": 46457999.0, + "reward": 0.0, + "reward_std": 0.7581911087036133, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.055300903436520464, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18782760780894436, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0938872452190116, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1030 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1138.375, + "completions/mean_terminated_length": 921.4000244140625, + "completions/min_length": 734.0, + "completions/min_terminated_length": 734.0, + "epoch": 0.2578144536134033, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.494477357296656, + "kl": 0.0087890625, + "learning_rate": 9.336521521820811e-07, + "loss": 0.0537, + "num_tokens": 46502589.0, + "reward": 0.0, + "reward_std": 0.7612326145172119, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.13746903956626141, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12918232329982673, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0479196858952174, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1031 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1439.5, + "completions/mean_terminated_length": 1419.3333740234375, + "completions/min_length": 1229.0, + "completions/min_terminated_length": 1229.0, + "epoch": 0.25806451612903225, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3759091043200513, + "kl": 0.005615234375, + "learning_rate": 9.334467131647369e-07, + "loss": -0.0016, + "num_tokens": 46548749.0, + "reward": 0.0, + "reward_std": 0.9040517807006836, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009171651778796447, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04117733318502614, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0666666666666667, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1032 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1147.0, + "completions/mean_length": 1061.375, + "completions/mean_terminated_length": 998.71435546875, + "completions/min_length": 732.0, + "completions/min_terminated_length": 732.0, + "epoch": 0.2583145786446612, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.11248981727977, + "kl": 0.00543212890625, + "learning_rate": 9.33240981973744e-07, + "loss": 0.0022, + "num_tokens": 46594491.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0630748271942139, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.004402810380089861, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.027682795237031748, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05426273532033234, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1033 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1410.0, + "completions/mean_length": 1292.875, + "completions/mean_terminated_length": 1085.75, + "completions/min_length": 862.0, + "completions/min_terminated_length": 862.0, + "epoch": 0.2585646411602901, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.510365768230172, + "kl": 0.0068817138671875, + "learning_rate": 9.330349587658629e-07, + "loss": -0.028, + "num_tokens": 46651745.0, + "reward": 0.0, + "reward_std": 0.5631043314933777, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08112463843219425, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11418528628855369, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10390522747338701, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1034 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1153.5, + "completions/mean_terminated_length": 1073.5384521484375, + "completions/min_length": 695.0, + "completions/min_terminated_length": 695.0, + "epoch": 0.25881470367591897, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2646781805655265, + "kl": 0.0101470947265625, + "learning_rate": 9.328286436980763e-07, + "loss": -0.0788, + "num_tokens": 46702481.0, + "reward": 0.0, + "reward_std": 0.497397243976593, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06825237788744054, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07217032165033375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6416666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14168300559373406, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1035 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1345.0, + "completions/mean_length": 1189.1875, + "completions/mean_terminated_length": 1144.7857666015625, + "completions/min_length": 884.0, + "completions/min_terminated_length": 884.0, + "epoch": 0.2590647661915479, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2024364080095067, + "kl": 0.00927734375, + "learning_rate": 9.326220369275898e-07, + "loss": -0.0426, + "num_tokens": 46751644.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7891186475753784, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.29239183306034955, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.22760788319066663, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06652763279965648, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1036 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1092.0, + "completions/mean_length": 1253.25, + "completions/mean_terminated_length": 1006.5, + "completions/min_length": 928.0, + "completions/min_terminated_length": 928.0, + "epoch": 0.2593148287071768, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.434668489977372, + "kl": 0.0082855224609375, + "learning_rate": 9.324151386118303e-07, + "loss": -0.0015, + "num_tokens": 46804880.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0074104070663452, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06086805193254289, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10132525638820827, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797315, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1037 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1449.1875, + "completions/mean_terminated_length": 1337.4000244140625, + "completions/min_length": 1231.0, + "completions/min_terminated_length": 1231.0, + "epoch": 0.2595648912228057, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4571558954685826, + "kl": 0.00652313232421875, + "learning_rate": 9.322079489084479e-07, + "loss": -0.0032, + "num_tokens": 46864291.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.050126552581787, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07668552238737757, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2481211924424121, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1038 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1214.4375, + "completions/mean_terminated_length": 1148.5384521484375, + "completions/min_length": 1030.0, + "completions/min_terminated_length": 1030.0, + "epoch": 0.2598149537384346, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4681863242915525, + "kl": 0.005046844482421875, + "learning_rate": 9.320004679753141e-07, + "loss": -0.0125, + "num_tokens": 46906170.0, + "reward": -1.1175870895385742e-08, + "reward_std": 0.9742652773857117, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03259082283153944, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04680424345463638, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.057089922571845, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1039 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 947.625, + "completions/mean_terminated_length": 910.800048828125, + "completions/min_length": 445.0, + "completions/min_terminated_length": 445.0, + "epoch": 0.26006501625406353, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6361248645167494, + "kl": 0.0114898681640625, + "learning_rate": 9.317926959705223e-07, + "loss": -0.0436, + "num_tokens": 46945580.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8403726816177368, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.026245166310139866, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03402689043614111, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14981470036162822, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1040 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1255.25, + "completions/mean_terminated_length": 1108.4000244140625, + "completions/min_length": 920.0, + "completions/min_terminated_length": 920.0, + "epoch": 0.2603150787696924, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.057634303948361, + "kl": 0.01025390625, + "learning_rate": 9.315846330523882e-07, + "loss": -0.0092, + "num_tokens": 47002040.0, + "reward": 0.0, + "reward_std": 0.8483466506004333, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.014578174350208032, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08161556773516689, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087683, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1041 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1454.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 1236.25, + "completions/mean_terminated_length": 1236.25, + "completions/min_length": 1007.0, + "completions/min_terminated_length": 1007.0, + "epoch": 0.2605651412853213, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6008214727876906, + "kl": 0.0111236572265625, + "learning_rate": 9.313762793794487e-07, + "loss": -0.0077, + "num_tokens": 47041260.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.038791537284851, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01658610014134864, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04335123004153782, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05146016078626408, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1042 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1188.25, + "completions/mean_terminated_length": 1167.4666748046875, + "completions/min_length": 942.0, + "completions/min_terminated_length": 942.0, + "epoch": 0.26081520380095025, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.49243402395742, + "kl": 0.011444091796875, + "learning_rate": 9.311676351104622e-07, + "loss": -0.0034, + "num_tokens": 47087296.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.964765191078186, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08386835780116099, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04876199438013727, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1043 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1328.5, + "completions/mean_terminated_length": 1271.3333740234375, + "completions/min_length": 961.0, + "completions/min_terminated_length": 961.0, + "epoch": 0.26106526631657917, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.675813519909271, + "kl": 0.007537841796875, + "learning_rate": 9.309587004044089e-07, + "loss": 0.0157, + "num_tokens": 47136896.0, + "reward": 0.0, + "reward_std": 0.8427673578262329, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06976039455727671, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11959671196588331, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869929, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1044 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1318.0, + "completions/max_terminated_length": 1318.0, + "completions/mean_length": 917.875, + "completions/mean_terminated_length": 917.875, + "completions/min_length": 654.0, + "completions/min_terminated_length": 654.0, + "epoch": 0.26131532883220804, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.997947545922719, + "kl": 0.00738525390625, + "learning_rate": 9.3074947542049e-07, + "loss": -0.0375, + "num_tokens": 47177822.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8235709071159363, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.028663426860419616, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.101100718076789, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07084150279686705, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1045 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1448.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1165.75, + "completions/mean_terminated_length": 1165.75, + "completions/min_length": 914.0, + "completions/min_terminated_length": 914.0, + "epoch": 0.26156539134783696, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.943216095336275, + "kl": 0.00868988037109375, + "learning_rate": 9.30539960318128e-07, + "loss": 0.0082, + "num_tokens": 47225594.0, + "reward": 0.0, + "reward_std": 0.9972295761108398, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.031087838923126915, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07815528234604355, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1046 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1491.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1164.625, + "completions/mean_terminated_length": 1164.625, + "completions/min_length": 980.0, + "completions/min_terminated_length": 980.0, + "epoch": 0.2618154538634659, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.13704708466578, + "kl": 0.00884246826171875, + "learning_rate": 9.303301552569664e-07, + "loss": -0.0579, + "num_tokens": 47277132.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9096618890762329, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.024991505955177625, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08290225972005634, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11287488977066931, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1047 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1434.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1174.6875, + "completions/mean_terminated_length": 1174.6875, + "completions/min_length": 1023.0, + "completions/min_terminated_length": 1023.0, + "epoch": 0.26206551637909475, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8896311382726605, + "kl": 0.00927734375, + "learning_rate": 9.301200603968699e-07, + "loss": 0.0087, + "num_tokens": 47322423.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8295023441314697, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04851486822976438, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.053031574566075594, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10390522747338703, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1048 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1311.0, + "completions/mean_length": 1163.6875, + "completions/mean_terminated_length": 1141.2667236328125, + "completions/min_length": 1009.0, + "completions/min_terminated_length": 1009.0, + "epoch": 0.2623155788947237, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.233549488660122, + "kl": 0.006927490234375, + "learning_rate": 9.299096758979237e-07, + "loss": -0.0157, + "num_tokens": 47360690.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.933129072189331, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03221848444380976, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09645598650578965, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06206328908341755, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1049 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1357.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 1075.625, + "completions/mean_terminated_length": 1075.625, + "completions/min_length": 791.0, + "completions/min_terminated_length": 791.0, + "epoch": 0.2625656414103526, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.500632004840124, + "kl": 0.0092010498046875, + "learning_rate": 9.296990019204335e-07, + "loss": -0.0219, + "num_tokens": 47394300.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8164844512939453, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08756270196059729, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12450709667562358, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.060705726131767744, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1050 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1347.0, + "completions/mean_length": 1248.6875, + "completions/mean_terminated_length": 1190.6923828125, + "completions/min_length": 1007.0, + "completions/min_terminated_length": 1007.0, + "epoch": 0.26281570392598147, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2203423361768575, + "kl": 0.0100860595703125, + "learning_rate": 9.294880386249262e-07, + "loss": 0.0244, + "num_tokens": 47444423.0, + "reward": 0.0, + "reward_std": 0.9546298980712891, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06654790221923386, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07710009504187292, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11925695879998881, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1051 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1106.125, + "completions/mean_terminated_length": 1079.86669921875, + "completions/min_length": 597.0, + "completions/min_terminated_length": 597.0, + "epoch": 0.2630657664416104, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.24420247564594, + "kl": 0.00881195068359375, + "learning_rate": 9.292767861721488e-07, + "loss": 0.0607, + "num_tokens": 47488729.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6487468481063843, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10045691495574781, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12534898300403646, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5791666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10246950765959596, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1052 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1167.1875, + "completions/mean_terminated_length": 1056.25, + "completions/min_length": 740.0, + "completions/min_terminated_length": 740.0, + "epoch": 0.2633158289572393, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.240834260699552, + "kl": 0.009918212890625, + "learning_rate": 9.290652447230685e-07, + "loss": 0.027, + "num_tokens": 47538316.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9729578495025635, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.004287563621074427, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08857839148590874, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06540472290116194, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1053 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1342.0, + "completions/max_terminated_length": 1342.0, + "completions/mean_length": 1205.8125, + "completions/mean_terminated_length": 1205.8125, + "completions/min_length": 981.0, + "completions/min_terminated_length": 981.0, + "epoch": 0.26356589147286824, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3338354920000484, + "kl": 0.010284423828125, + "learning_rate": 9.288534144388728e-07, + "loss": -0.0068, + "num_tokens": 47576809.0, + "reward": 0.0, + "reward_std": 1.0242129564285278, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04831403750160806, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03763419575907004, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1172998689652263, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1054 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1306.75, + "completions/mean_terminated_length": 1293.86669921875, + "completions/min_length": 1146.0, + "completions/min_terminated_length": 1146.0, + "epoch": 0.2638159539884971, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3237679985662143, + "kl": 0.01068115234375, + "learning_rate": 9.286412954809696e-07, + "loss": 0.0107, + "num_tokens": 47622221.0, + "reward": 0.0, + "reward_std": 0.992053747177124, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.16926068626325327, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16876192682565452, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.24247947603105774, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1055 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1467.0, + "completions/mean_length": 1144.0625, + "completions/mean_terminated_length": 1120.3333740234375, + "completions/min_length": 484.0, + "completions/min_terminated_length": 484.0, + "epoch": 0.26406601650412603, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4155717914446733, + "kl": 0.0104827880859375, + "learning_rate": 9.284288880109863e-07, + "loss": -0.1036, + "num_tokens": 47680102.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9768359661102295, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -7.181666371634636e-05, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14449289384308342, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6416666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042252, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1056 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1274.25, + "completions/mean_terminated_length": 1171.6363525390625, + "completions/min_length": 894.0, + "completions/min_terminated_length": 894.0, + "epoch": 0.26431607901975496, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.78486475992051, + "kl": 0.008056640625, + "learning_rate": 9.282161921907704e-07, + "loss": -0.032, + "num_tokens": 47737442.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8572721481323242, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07585059215671901, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1647086455183051, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10036968702787749, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1057 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1347.0, + "completions/max_terminated_length": 1347.0, + "completions/mean_length": 1101.875, + "completions/mean_terminated_length": 1101.875, + "completions/min_length": 913.0, + "completions/min_terminated_length": 913.0, + "epoch": 0.2645661415353838, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7384572038674664, + "kl": 0.005748748779296875, + "learning_rate": 9.280032081823891e-07, + "loss": -0.005, + "num_tokens": 47784256.0, + "reward": 0.0, + "reward_std": 0.7396981716156006, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009687949217123345, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10421321413707033, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10878112581387149, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1058 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1312.0, + "completions/mean_length": 880.875, + "completions/mean_terminated_length": 792.4285888671875, + "completions/min_length": 527.0, + "completions/min_terminated_length": 527.0, + "epoch": 0.26481620405101275, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2259676800133756, + "kl": 0.00658416748046875, + "learning_rate": 9.27789936148129e-07, + "loss": -0.0241, + "num_tokens": 47821990.0, + "reward": 4.470348358154297e-08, + "reward_std": 1.049208641052246, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.022292934564678554, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06756209576744598, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12164002752505566, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1059 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1325.6875, + "completions/mean_terminated_length": 1300.7857666015625, + "completions/min_length": 1027.0, + "completions/min_terminated_length": 1027.0, + "epoch": 0.2650662665666417, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.923267079422633, + "kl": 0.0126953125, + "learning_rate": 9.275763762504961e-07, + "loss": -0.0003, + "num_tokens": 47874577.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9351505041122437, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08379110915940083, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06930180961263603, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05962847939999443, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1060 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1331.1875, + "completions/mean_terminated_length": 1254.45458984375, + "completions/min_length": 814.0, + "completions/min_terminated_length": 814.0, + "epoch": 0.26531632908227054, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.582862218333396, + "kl": 0.00769805908203125, + "learning_rate": 9.273625286522166e-07, + "loss": -0.0282, + "num_tokens": 47931428.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0124320983886719, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.022337312763407877, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08192415212647837, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11417984514369006, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1061 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1303.0, + "completions/mean_length": 1164.4375, + "completions/mean_terminated_length": 963.1000366210938, + "completions/min_length": 406.0, + "completions/min_terminated_length": 406.0, + "epoch": 0.26556639159789946, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6119590664116323, + "kl": 0.0089569091796875, + "learning_rate": 9.271483935162346e-07, + "loss": -0.0902, + "num_tokens": 47989947.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0675547122955322, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10599747815621643, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07587502741259948, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13443985299781488, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1062 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1345.5, + "completions/mean_terminated_length": 1323.4285888671875, + "completions/min_length": 1032.0, + "completions/min_terminated_length": 1032.0, + "epoch": 0.2658164541135284, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.460732080749778, + "kl": 0.00689697265625, + "learning_rate": 9.269339710057144e-07, + "loss": 0.0268, + "num_tokens": 48037627.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5567209720611572, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0603701482606394, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10867528223721554, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563383, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1063 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1403.0, + "completions/mean_length": 1269.0625, + "completions/mean_terminated_length": 1038.125, + "completions/min_length": 510.0, + "completions/min_terminated_length": 510.0, + "epoch": 0.2660665166291573, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0932237563222564, + "kl": 0.008331298828125, + "learning_rate": 9.267192612840385e-07, + "loss": -0.0063, + "num_tokens": 48091692.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.019169807434082, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.15963746513981894, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.054552106162666036, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1064 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1070.1875, + "completions/mean_terminated_length": 1041.533447265625, + "completions/min_length": 679.0, + "completions/min_terminated_length": 679.0, + "epoch": 0.2663165791447862, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3165337734185525, + "kl": 0.00455474853515625, + "learning_rate": 9.265042645148088e-07, + "loss": -0.0146, + "num_tokens": 48139495.0, + "reward": 0.0, + "reward_std": 0.2952994704246521, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09065293967463248, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08661142188067948, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1031898645611484, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1065 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1375.0, + "completions/max_terminated_length": 1375.0, + "completions/mean_length": 1247.125, + "completions/mean_terminated_length": 1247.125, + "completions/min_length": 926.0, + "completions/min_terminated_length": 926.0, + "epoch": 0.2665666416604151, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4076936167642313, + "kl": 0.010528564453125, + "learning_rate": 9.262889808618454e-07, + "loss": 0.0036, + "num_tokens": 48190257.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.0527265071868896, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.16564170345826626, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1584954284835632, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07302967433402217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1066 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1400.0, + "completions/max_terminated_length": 1400.0, + "completions/mean_length": 1106.875, + "completions/mean_terminated_length": 1106.875, + "completions/min_length": 952.0, + "completions/min_terminated_length": 952.0, + "epoch": 0.26681670417604403, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4686391777792114, + "kl": 0.009918212890625, + "learning_rate": 9.260734104891877e-07, + "loss": -0.0256, + "num_tokens": 48238503.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0469577312469482, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03408425074859937, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05481203815152087, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.070841502796867, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1067 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1410.0, + "completions/max_terminated_length": 1410.0, + "completions/mean_length": 954.5, + "completions/mean_terminated_length": 954.5, + "completions/min_length": 599.0, + "completions/min_terminated_length": 599.0, + "epoch": 0.2670667666916729, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.403295526100931, + "kl": 0.00751495361328125, + "learning_rate": 9.258575535610928e-07, + "loss": -0.0022, + "num_tokens": 48269735.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8550411462783813, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.006884864432240787, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0937938161883861, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1068 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1351.0, + "completions/max_terminated_length": 1351.0, + "completions/mean_length": 973.375, + "completions/mean_terminated_length": 973.375, + "completions/min_length": 770.0, + "completions/min_terminated_length": 770.0, + "epoch": 0.2673168292073018, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5502439048811913, + "kl": 0.004329681396484375, + "learning_rate": 9.256414102420368e-07, + "loss": -0.0174, + "num_tokens": 48317085.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9364168643951416, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.026034555525429617, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11306551965907595, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06540472290116198, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1069 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1427.0, + "completions/mean_length": 1214.6875, + "completions/mean_terminated_length": 1173.9285888671875, + "completions/min_length": 978.0, + "completions/min_terminated_length": 978.0, + "epoch": 0.26756689172293074, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1427573235701427, + "kl": 0.01003265380859375, + "learning_rate": 9.254249806967134e-07, + "loss": -0.0175, + "num_tokens": 48365984.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8360569477081299, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.001242654647899898, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12895532301186746, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0910840068085298, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1070 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 903.0, + "completions/mean_length": 1132.4375, + "completions/mean_terminated_length": 764.875, + "completions/min_length": 614.0, + "completions/min_terminated_length": 614.0, + "epoch": 0.2678169542385596, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0988329425758416, + "kl": 0.0095977783203125, + "learning_rate": 9.252082650900351e-07, + "loss": 0.014, + "num_tokens": 48407423.0, + "reward": 0.0, + "reward_std": 0.807144820690155, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.011769122969835616, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.054907500068927154, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1071 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1151.5625, + "completions/mean_terminated_length": 1101.7857666015625, + "completions/min_length": 711.0, + "completions/min_terminated_length": 711.0, + "epoch": 0.26806701675418854, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.180964044293882, + "kl": 0.0091552734375, + "learning_rate": 9.249912635871317e-07, + "loss": 0.0257, + "num_tokens": 48454624.0, + "reward": 0.0, + "reward_std": 0.8107514977455139, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02288312441028216, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09756451149846891, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238707, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1072 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1280.0, + "completions/max_terminated_length": 1280.0, + "completions/mean_length": 963.0625, + "completions/mean_terminated_length": 963.0625, + "completions/min_length": 709.0, + "completions/min_terminated_length": 709.0, + "epoch": 0.26831707926981746, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6379851934806924, + "kl": 0.0102081298828125, + "learning_rate": 9.247739763533512e-07, + "loss": -0.0529, + "num_tokens": 48495353.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.5706742405891418, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07325875716531352, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10023554641619199, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1128748897706693, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1073 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1397.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 1003.8125, + "completions/mean_terminated_length": 1003.8125, + "completions/min_length": 670.0, + "completions/min_terminated_length": 670.0, + "epoch": 0.2685671417854464, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8578209641209504, + "kl": 0.00774383544921875, + "learning_rate": 9.245564035542593e-07, + "loss": 0.028, + "num_tokens": 48550902.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9526702165603638, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08811912815310897, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06686764060241068, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6124999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08509254221575906, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1074 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1322.0, + "completions/mean_length": 1080.125, + "completions/mean_terminated_length": 983.2308349609375, + "completions/min_length": 659.0, + "completions/min_terminated_length": 659.0, + "epoch": 0.26881720430107525, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1736195516078864, + "kl": 0.00856781005859375, + "learning_rate": 9.243385453556391e-07, + "loss": 0.008, + "num_tokens": 48595064.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8951947093009949, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.007930409665983715, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.00954031840015872, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06070572613176771, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1075 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1387.0, + "completions/mean_length": 1257.125, + "completions/mean_terminated_length": 1068.2222900390625, + "completions/min_length": 871.0, + "completions/min_terminated_length": 871.0, + "epoch": 0.2690672668167042, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.812473105233374, + "kl": 0.00579071044921875, + "learning_rate": 9.241204019234915e-07, + "loss": 0.031, + "num_tokens": 48643954.0, + "reward": 0.0, + "reward_std": 0.9468863010406494, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.2216721158643779, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17252322214961774, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10878112581387148, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1076 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1417.3125, + "completions/mean_terminated_length": 1367.7000732421875, + "completions/min_length": 1163.0, + "completions/min_terminated_length": 1163.0, + "epoch": 0.2693173293323331, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1976304215722164, + "kl": 0.0099334716796875, + "learning_rate": 9.239019734240345e-07, + "loss": -0.0186, + "num_tokens": 48693295.0, + "reward": 0.0, + "reward_std": 0.8770102858543396, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.007789398718094782, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.044142838748887046, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06978803887752094, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1077 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1286.375, + "completions/mean_terminated_length": 1072.75, + "completions/min_length": 875.0, + "completions/min_terminated_length": 875.0, + "epoch": 0.26956739184796197, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5639432105538393, + "kl": 0.00618743896484375, + "learning_rate": 9.236832600237031e-07, + "loss": 0.0195, + "num_tokens": 48746845.0, + "reward": 0.0, + "reward_std": 1.0128943920135498, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07893041289267134, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.048885154618570334, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10390522747338703, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1078 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1156.625, + "completions/mean_terminated_length": 1133.7333984375, + "completions/min_length": 1004.0, + "completions/min_terminated_length": 1004.0, + "epoch": 0.2698174543635909, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.56023914948797, + "kl": 0.0118408203125, + "learning_rate": 9.234642618891501e-07, + "loss": -0.0219, + "num_tokens": 48785711.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0124239921569824, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06224733413655627, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07206049221114484, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09803627446568498, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1079 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1250.0, + "completions/max_terminated_length": 1250.0, + "completions/mean_length": 969.25, + "completions/mean_terminated_length": 969.25, + "completions/min_length": 727.0, + "completions/min_terminated_length": 727.0, + "epoch": 0.2700675168792198, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0783239248714747, + "kl": 0.006092071533203125, + "learning_rate": 9.232449791872444e-07, + "loss": 0.0521, + "num_tokens": 48824283.0, + "reward": 0.0, + "reward_std": 0.6873195767402649, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10280020062058934, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14159963874449216, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11800816042090449, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1080 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1257.9375, + "completions/mean_terminated_length": 1241.800048828125, + "completions/min_length": 709.0, + "completions/min_terminated_length": 709.0, + "epoch": 0.27031757939484874, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1855885764864746, + "kl": 0.01116943359375, + "learning_rate": 9.230254120850722e-07, + "loss": 0.0395, + "num_tokens": 48872786.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0573084354400635, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10173674202290545, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20964779512625736, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08944271909999162, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1081 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1064.375, + "completions/mean_terminated_length": 1035.3333740234375, + "completions/min_length": 600.0, + "completions/min_terminated_length": 600.0, + "epoch": 0.2705676419104776, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0565216834050717, + "kl": 0.00417327880859375, + "learning_rate": 9.228055607499365e-07, + "loss": 0.0271, + "num_tokens": 48908520.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7074522376060486, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05408618574696474, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16041554546136064, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11213417888437974, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1082 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1313.0, + "completions/max_terminated_length": 1313.0, + "completions/mean_length": 1032.875, + "completions/mean_terminated_length": 1032.875, + "completions/min_length": 642.0, + "completions/min_terminated_length": 642.0, + "epoch": 0.27081770442610653, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7478261289380215, + "kl": 0.007503509521484375, + "learning_rate": 9.225854253493567e-07, + "loss": -0.0078, + "num_tokens": 48942766.0, + "reward": 0.0, + "reward_std": 0.39146506786346436, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08428431115401504, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18086875996178797, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10532137766186217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1083 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1478.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1013.75, + "completions/mean_terminated_length": 1013.75, + "completions/min_length": 565.0, + "completions/min_terminated_length": 565.0, + "epoch": 0.27106776694173546, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4930880371030346, + "kl": 0.004627227783203125, + "learning_rate": 9.223650060510684e-07, + "loss": -0.0148, + "num_tokens": 48977018.0, + "reward": 0.0, + "reward_std": 0.48078322410583496, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.010003649759044934, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.021587103004865292, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15104573749303493, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1084 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1360.0, + "completions/max_terminated_length": 1360.0, + "completions/mean_length": 986.375, + "completions/mean_terminated_length": 986.375, + "completions/min_length": 730.0, + "completions/min_terminated_length": 730.0, + "epoch": 0.2713178294573643, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5389675878076563, + "kl": 0.006988525390625, + "learning_rate": 9.221443030230242e-07, + "loss": -0.0329, + "num_tokens": 49010376.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.0610061883926392, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06667843371608165, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.054415683737110566, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09888264649460886, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1085 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1403.0, + "completions/mean_length": 1212.375, + "completions/mean_terminated_length": 1146.0, + "completions/min_length": 964.0, + "completions/min_terminated_length": 964.0, + "epoch": 0.27156789197299325, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2016646178693935, + "kl": 0.0096893310546875, + "learning_rate": 9.219233164333919e-07, + "loss": -0.0362, + "num_tokens": 49048742.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9722825288772583, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10315835727358705, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15572000480412518, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869927, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1086 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1086.6875, + "completions/mean_terminated_length": 1059.1334228515625, + "completions/min_length": 665.0, + "completions/min_terminated_length": 665.0, + "epoch": 0.27181795448862217, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5740216962031934, + "kl": 0.00809478759765625, + "learning_rate": 9.217020464505565e-07, + "loss": 0.0199, + "num_tokens": 49088985.0, + "reward": 0.0, + "reward_std": 0.5474467277526855, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04705760712982224, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06930843008300691, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12405196043952266, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1087 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1257.0625, + "completions/mean_terminated_length": 1222.357177734375, + "completions/min_length": 949.0, + "completions/min_terminated_length": 949.0, + "epoch": 0.27206801700425104, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.440077447709604, + "kl": 0.0111846923828125, + "learning_rate": 9.214804932431181e-07, + "loss": 0.0454, + "num_tokens": 49143778.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9754810333251953, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05820261578006208, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05648204631065286, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042258, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1088 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1237.6875, + "completions/mean_terminated_length": 1220.2000732421875, + "completions/min_length": 908.0, + "completions/min_terminated_length": 908.0, + "epoch": 0.27231807951987996, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1230662620621494, + "kl": 0.010345458984375, + "learning_rate": 9.212586569798928e-07, + "loss": -0.0115, + "num_tokens": 49192005.0, + "reward": 0.0, + "reward_std": 0.9353021383285522, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011379117239640708, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.052023301911729734, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.03849001794597506, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1089 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1202.4375, + "completions/mean_terminated_length": 1159.9285888671875, + "completions/min_length": 917.0, + "completions/min_terminated_length": 917.0, + "epoch": 0.2725681420355089, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6838639190883486, + "kl": 0.0070037841796875, + "learning_rate": 9.210365378299126e-07, + "loss": -0.0425, + "num_tokens": 49236796.0, + "reward": 0.0, + "reward_std": 0.8666723370552063, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06297531276746161, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0955697080744837, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869929, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1090 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1358.0, + "completions/max_terminated_length": 1358.0, + "completions/mean_length": 998.375, + "completions/mean_terminated_length": 998.375, + "completions/min_length": 774.0, + "completions/min_terminated_length": 774.0, + "epoch": 0.2728182045511378, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6306988219950154, + "kl": 0.0094146728515625, + "learning_rate": 9.208141359624248e-07, + "loss": -0.003, + "num_tokens": 49274666.0, + "reward": 0.0, + "reward_std": 0.6905403137207031, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08326467595143366, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10088786678892582, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466157, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1091 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 1290.4375, + "completions/mean_terminated_length": 1260.5, + "completions/min_length": 1029.0, + "completions/min_terminated_length": 1029.0, + "epoch": 0.2730682670667667, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.651561725386305, + "kl": 0.00791168212890625, + "learning_rate": 9.205914515468925e-07, + "loss": 0.011, + "num_tokens": 49318297.0, + "reward": 0.0, + "reward_std": 0.9372519850730896, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10570491250235155, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13592591998457104, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066221, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1092 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1435.0, + "completions/max_terminated_length": 1435.0, + "completions/mean_length": 1204.8125, + "completions/mean_terminated_length": 1204.8125, + "completions/min_length": 928.0, + "completions/min_terminated_length": 928.0, + "epoch": 0.2733183295823956, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0183959979097637, + "kl": 0.006923675537109375, + "learning_rate": 9.203684847529936e-07, + "loss": -0.0472, + "num_tokens": 49353582.0, + "reward": 0.0, + "reward_std": 0.996518611907959, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00824223540639082, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.026445211131006514, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1287403358472941, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1093 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1224.0, + "completions/mean_terminated_length": 1205.60009765625, + "completions/min_length": 900.0, + "completions/min_terminated_length": 900.0, + "epoch": 0.2735683920980245, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.079565123573228, + "kl": 0.0095062255859375, + "learning_rate": 9.201452357506212e-07, + "loss": -0.0516, + "num_tokens": 49399278.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0226129293441772, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07111473133636162, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06881522207798774, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07490735018081413, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1094 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 1332.4375, + "completions/mean_terminated_length": 1293.769287109375, + "completions/min_length": 994.0, + "completions/min_terminated_length": 994.0, + "epoch": 0.2738184546136534, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.523877683453408, + "kl": 0.00818634033203125, + "learning_rate": 9.19921704709884e-07, + "loss": 0.0298, + "num_tokens": 49457805.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5587790012359619, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.018477083551220127, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21759118562323165, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10470416879457553, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1095 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1480.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1158.8125, + "completions/mean_terminated_length": 1158.8125, + "completions/min_length": 767.0, + "completions/min_terminated_length": 767.0, + "epoch": 0.2740685171292823, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.146862028622263, + "kl": 0.00994873046875, + "learning_rate": 9.196978918011051e-07, + "loss": -0.0204, + "num_tokens": 49503002.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.737562894821167, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0416206662346508, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04670197493057892, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08777074514725114, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1096 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1291.5625, + "completions/mean_terminated_length": 1129.4444580078125, + "completions/min_length": 753.0, + "completions/min_terminated_length": 753.0, + "epoch": 0.27431857964491124, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.093335189632621, + "kl": 0.00939178466796875, + "learning_rate": 9.194737971948223e-07, + "loss": 0.0044, + "num_tokens": 49555067.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6046839952468872, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03989555064188882, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08111237132777013, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07391185942027816, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1097 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1216.8125, + "completions/mean_terminated_length": 1197.933349609375, + "completions/min_length": 805.0, + "completions/min_terminated_length": 805.0, + "epoch": 0.2745686421605401, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1717129630139023, + "kl": 0.008056640625, + "learning_rate": 9.192494210617883e-07, + "loss": -0.0192, + "num_tokens": 49603440.0, + "reward": 0.0, + "reward_std": 0.6300812363624573, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.012855795252983566, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.053415778945892625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12758439472669758, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1098 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1148.625, + "completions/mean_terminated_length": 1125.2000732421875, + "completions/min_length": 801.0, + "completions/min_terminated_length": 801.0, + "epoch": 0.27481870467616903, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2378807772029585, + "kl": 0.011260986328125, + "learning_rate": 9.190247635729706e-07, + "loss": -0.084, + "num_tokens": 49655202.0, + "reward": 0.0, + "reward_std": 1.0368832349777222, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005163113199499161, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08852750009916033, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941139, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1099 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1458.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1091.5625, + "completions/mean_terminated_length": 1091.5625, + "completions/min_length": 850.0, + "completions/min_terminated_length": 850.0, + "epoch": 0.27506876719179796, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5987443939589445, + "kl": 0.0099639892578125, + "learning_rate": 9.187998248995505e-07, + "loss": -0.0265, + "num_tokens": 49702619.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9431605339050293, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05526417530287456, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11607539032300784, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08255189164891874, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1100 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1267.0, + "completions/max_terminated_length": 1267.0, + "completions/mean_length": 1000.5, + "completions/mean_terminated_length": 1000.5, + "completions/min_length": 775.0, + "completions/min_terminated_length": 775.0, + "epoch": 0.2753188297074269, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.319526583712982, + "kl": 0.00823974609375, + "learning_rate": 9.185746052129235e-07, + "loss": 0.006, + "num_tokens": 49732147.0, + "reward": 0.0, + "reward_std": 0.9872654676437378, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.022732327387943608, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.060517928239261325, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08421753138505424, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1101 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1113.5, + "completions/mean_terminated_length": 1058.2857666015625, + "completions/min_length": 455.0, + "completions/min_terminated_length": 455.0, + "epoch": 0.27556889222305575, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8764515833940116, + "kl": 0.0119781494140625, + "learning_rate": 9.183491046846999e-07, + "loss": 0.025, + "num_tokens": 49772779.0, + "reward": 0.0, + "reward_std": 1.0079761743545532, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0012595164393633673, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03088989269393661, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11417984514369006, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1102 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1451.0, + "completions/mean_length": 1046.3125, + "completions/mean_terminated_length": 1016.0667114257812, + "completions/min_length": 718.0, + "completions/min_terminated_length": 718.0, + "epoch": 0.2758189547386847, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.729758978028273, + "kl": 0.01153564453125, + "learning_rate": 9.181233234867035e-07, + "loss": -0.0642, + "num_tokens": 49815504.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0150370597839355, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.023389980921259733, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09724975112078536, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10945995377982527, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1103 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1141.0, + "completions/max_terminated_length": 1141.0, + "completions/mean_length": 948.4375, + "completions/mean_terminated_length": 948.4375, + "completions/min_length": 630.0, + "completions/min_terminated_length": 630.0, + "epoch": 0.2760690172543136, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.32265360163392, + "kl": 0.00914764404296875, + "learning_rate": 9.178972617909721e-07, + "loss": 0.0229, + "num_tokens": 49858647.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0142794847488403, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.20725805160826632, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3778090831085319, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09651328828101766, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1104 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1218.0, + "completions/max_terminated_length": 1218.0, + "completions/mean_length": 1058.4375, + "completions/mean_terminated_length": 1058.4375, + "completions/min_length": 895.0, + "completions/min_terminated_length": 895.0, + "epoch": 0.27631907976994247, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.662411640837916, + "kl": 0.00682830810546875, + "learning_rate": 9.176709197697572e-07, + "loss": -0.0127, + "num_tokens": 49891958.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9119734764099121, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.014495773443476714, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02704170927435661, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07698003589195013, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1105 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1155.9375, + "completions/mean_terminated_length": 1076.5384521484375, + "completions/min_length": 796.0, + "completions/min_terminated_length": 796.0, + "epoch": 0.2765691422855714, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.7309782346065825, + "kl": 0.00380706787109375, + "learning_rate": 9.174442975955237e-07, + "loss": -0.0102, + "num_tokens": 49935429.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.8802613019943237, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.028138349365604692, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12375123964978693, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10610965676722954, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1106 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1420.0, + "completions/max_terminated_length": 1420.0, + "completions/mean_length": 1109.3125, + "completions/mean_terminated_length": 1109.3125, + "completions/min_length": 820.0, + "completions/min_terminated_length": 820.0, + "epoch": 0.2768192048012003, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2685887768770114, + "kl": 0.0095977783203125, + "learning_rate": 9.172173954409504e-07, + "loss": 0.0372, + "num_tokens": 49973154.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6453003883361816, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.115331715583152, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11062107836087585, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13743685418725538, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1107 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1169.75, + "completions/mean_terminated_length": 1122.571533203125, + "completions/min_length": 826.0, + "completions/min_terminated_length": 826.0, + "epoch": 0.2770692673168292, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.568526173169773, + "kl": 0.01019287109375, + "learning_rate": 9.169902134789292e-07, + "loss": -0.0467, + "num_tokens": 50025006.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9547272324562073, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03365121347082857, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2619241413737418, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13305526559931294, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1108 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1102.75, + "completions/mean_terminated_length": 1046.0, + "completions/min_length": 681.0, + "completions/min_terminated_length": 681.0, + "epoch": 0.2773193298324581, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7957654474691878, + "kl": 0.0124664306640625, + "learning_rate": 9.167627518825651e-07, + "loss": -0.0126, + "num_tokens": 50077378.0, + "reward": 0.0, + "reward_std": 0.6720855832099915, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10086444366785038, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21167381684504333, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11344765475923412, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1109 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1407.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 893.0, + "completions/mean_terminated_length": 893.0, + "completions/min_length": 493.0, + "completions/min_terminated_length": 493.0, + "epoch": 0.27756939234808703, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.07530203017959, + "kl": 0.0099334716796875, + "learning_rate": 9.165350108251762e-07, + "loss": 0.0613, + "num_tokens": 50109826.0, + "reward": 0.0, + "reward_std": 0.8825019598007202, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04097279040293781, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.034713114087807424, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.65, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238704, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1110 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1269.4375, + "completions/mean_terminated_length": 1236.5, + "completions/min_length": 898.0, + "completions/min_terminated_length": 898.0, + "epoch": 0.27781945486371595, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.060781309418307, + "kl": 0.00876617431640625, + "learning_rate": 9.163069904802938e-07, + "loss": 0.0123, + "num_tokens": 50153577.0, + "reward": 0.0, + "reward_std": 0.7626385688781738, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.043151033211373556, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07195258806942438, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10036968702787749, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1111 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1349.9375, + "completions/mean_terminated_length": 1315.3077392578125, + "completions/min_length": 1090.0, + "completions/min_terminated_length": 1090.0, + "epoch": 0.2780695173793448, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.250961586964342, + "kl": 0.0043487548828125, + "learning_rate": 9.160786910216616e-07, + "loss": 0.0064, + "num_tokens": 50201320.0, + "reward": 0.0, + "reward_std": 0.8087390661239624, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07557610656504363, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09535837622432322, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1112 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1388.0, + "completions/mean_length": 1321.0, + "completions/mean_terminated_length": 1279.6923828125, + "completions/min_length": 1030.0, + "completions/min_terminated_length": 1030.0, + "epoch": 0.27831957989497375, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8223579759542736, + "kl": 0.0076904296875, + "learning_rate": 9.158501126232364e-07, + "loss": -0.0381, + "num_tokens": 50245728.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0497851371765137, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.016871092838519074, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0997638476972785, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0687184270936277, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1113 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1243.0, + "completions/max_terminated_length": 1243.0, + "completions/mean_length": 894.8125, + "completions/mean_terminated_length": 894.8125, + "completions/min_length": 571.0, + "completions/min_terminated_length": 571.0, + "epoch": 0.27856964241060267, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.952310295593544, + "kl": 0.0066070556640625, + "learning_rate": 9.156212554591873e-07, + "loss": -0.0207, + "num_tokens": 50281933.0, + "reward": 0.0, + "reward_std": 0.6862680912017822, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.025934208312343728, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.053854536457284545, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05146016078626408, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1114 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1180.75, + "completions/mean_terminated_length": 1159.4666748046875, + "completions/min_length": 822.0, + "completions/min_terminated_length": 822.0, + "epoch": 0.27881970492623154, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6879748182192094, + "kl": 0.005706787109375, + "learning_rate": 9.153921197038959e-07, + "loss": -0.0363, + "num_tokens": 50318985.0, + "reward": 0.0, + "reward_std": 0.6482902765274048, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1146238359155164, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11148248980086169, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11409872268574492, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1115 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1387.0, + "completions/mean_length": 1127.4375, + "completions/mean_terminated_length": 1102.60009765625, + "completions/min_length": 817.0, + "completions/min_terminated_length": 817.0, + "epoch": 0.27906976744186046, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0393003411784534, + "kl": 0.01056671142578125, + "learning_rate": 9.151627055319557e-07, + "loss": -0.0381, + "num_tokens": 50363280.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6147470474243164, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.17135987032983804, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2777231907731384, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1067187372905475, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1116 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1198.0, + "completions/max_terminated_length": 1198.0, + "completions/mean_length": 886.5, + "completions/mean_terminated_length": 886.5, + "completions/min_length": 610.0, + "completions/min_terminated_length": 610.0, + "epoch": 0.2793198299574894, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.117069464996866, + "kl": 0.0076904296875, + "learning_rate": 9.149330131181731e-07, + "loss": -0.0111, + "num_tokens": 50393416.0, + "reward": 0.0, + "reward_std": 0.47458523511886597, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.033444281026765874, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17746492112585013, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.65, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17469550228474265, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1117 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1312.0, + "completions/max_terminated_length": 1312.0, + "completions/mean_length": 866.375, + "completions/mean_terminated_length": 866.375, + "completions/min_length": 614.0, + "completions/min_terminated_length": 614.0, + "epoch": 0.27956989247311825, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0980779755267567, + "kl": 0.00814056396484375, + "learning_rate": 9.147030426375659e-07, + "loss": -0.0351, + "num_tokens": 50423814.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0569500923156738, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.026076958112716778, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11925375605751569, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715323, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1118 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1234.0, + "completions/mean_length": 973.875, + "completions/mean_terminated_length": 938.800048828125, + "completions/min_length": 789.0, + "completions/min_terminated_length": 789.0, + "epoch": 0.2798199549887472, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4761283445907227, + "kl": 0.01146697998046875, + "learning_rate": 9.144727942653639e-07, + "loss": -0.0242, + "num_tokens": 50463524.0, + "reward": 0.0, + "reward_std": 0.6541712284088135, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09238563032360442, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.31763989361511485, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12292725943057185, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1119 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1363.1875, + "completions/mean_terminated_length": 1281.0999755859375, + "completions/min_length": 1072.0, + "completions/min_terminated_length": 1072.0, + "epoch": 0.2800700175043761, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5826171232809325, + "kl": 0.00708770751953125, + "learning_rate": 9.142422681770087e-07, + "loss": 0.0087, + "num_tokens": 50518431.0, + "reward": 1.30385160446167e-08, + "reward_std": 0.9266164302825928, + "rewards/wordcountpos_reward_GEOBench/mean": 1.30385160446167e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1194631041634724, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1339620105899575, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08255189164891873, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1120 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1477.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1174.9375, + "completions/mean_terminated_length": 1174.9375, + "completions/min_length": 886.0, + "completions/min_terminated_length": 886.0, + "epoch": 0.280320080020005, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5737175218199315, + "kl": 0.0234222412109375, + "learning_rate": 9.140114645481537e-07, + "loss": 0.0153, + "num_tokens": 50564782.0, + "reward": 0.0, + "reward_std": 0.5902757048606873, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07306570998019044, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0859593212403608, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10878112581387148, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1121 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1113.0, + "completions/max_terminated_length": 1113.0, + "completions/mean_length": 956.125, + "completions/mean_terminated_length": 956.125, + "completions/min_length": 652.0, + "completions/min_terminated_length": 652.0, + "epoch": 0.2805701425356339, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.549329155706771, + "kl": 0.00940704345703125, + "learning_rate": 9.137803835546635e-07, + "loss": 0.0088, + "num_tokens": 50607880.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8272860050201416, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03730795169083271, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05648839867242615, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1112221667221529, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1122 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1401.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1044.3125, + "completions/mean_terminated_length": 1044.3125, + "completions/min_length": 860.0, + "completions/min_terminated_length": 860.0, + "epoch": 0.2808202050512628, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.40978489688719, + "kl": 0.011199951171875, + "learning_rate": 9.135490253726141e-07, + "loss": 0.0598, + "num_tokens": 50651237.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0105233192443848, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06301717097305572, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07314467404499556, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1123 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1326.4375, + "completions/mean_terminated_length": 1247.5455322265625, + "completions/min_length": 896.0, + "completions/min_terminated_length": 896.0, + "epoch": 0.28107026756689174, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.6176890205747267, + "kl": 0.00449371337890625, + "learning_rate": 9.133173901782928e-07, + "loss": 0.0187, + "num_tokens": 50697540.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8310480713844299, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09244594402395839, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08518546131528873, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1102186379345533, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1124 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1210.6875, + "completions/mean_terminated_length": 1191.4000244140625, + "completions/min_length": 510.0, + "completions/min_terminated_length": 510.0, + "epoch": 0.2813203300825206, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6258675069122974, + "kl": 0.00756072998046875, + "learning_rate": 9.130854781481978e-07, + "loss": -0.0323, + "num_tokens": 50741727.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.9632656574249268, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009261429378133693, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.024422764435074996, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952503, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1125 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1457.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1005.6875, + "completions/mean_terminated_length": 1005.6875, + "completions/min_length": 656.0, + "completions/min_terminated_length": 656.0, + "epoch": 0.28157039259814953, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.371601393221872, + "kl": 0.0115509033203125, + "learning_rate": 9.128532894590385e-07, + "loss": -0.0397, + "num_tokens": 50785506.0, + "reward": 0.0, + "reward_std": 0.4674912691116333, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.050002495211280334, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12732516254451945, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1126 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1345.0, + "completions/max_terminated_length": 1345.0, + "completions/mean_length": 1169.9375, + "completions/mean_terminated_length": 1169.9375, + "completions/min_length": 1045.0, + "completions/min_terminated_length": 1045.0, + "epoch": 0.28182045511377846, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.087491616430544, + "kl": 0.0082244873046875, + "learning_rate": 9.126208242877348e-07, + "loss": 0.0211, + "num_tokens": 50827137.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.024529218673706, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.011101359467153106, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05992659527789626, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13443985299781488, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1127 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1111.125, + "completions/mean_terminated_length": 1021.3846435546875, + "completions/min_length": 722.0, + "completions/min_terminated_length": 722.0, + "epoch": 0.2820705176294073, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0127496174623434, + "kl": 0.008087158203125, + "learning_rate": 9.123880828114177e-07, + "loss": 0.0112, + "num_tokens": 50876235.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.766568124294281, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.004895250966925162, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.057949349001757834, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.056927504255331086, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1128 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1466.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 989.0625, + "completions/mean_terminated_length": 989.0625, + "completions/min_length": 700.0, + "completions/min_terminated_length": 700.0, + "epoch": 0.28232058014503625, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.11050771159306, + "kl": 0.0133819580078125, + "learning_rate": 9.12155065207428e-07, + "loss": 0.006, + "num_tokens": 50913884.0, + "reward": 0.0, + "reward_std": 0.7707127928733826, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07751245820598772, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09406520921823008, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0909822937597079, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1129 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1201.8125, + "completions/mean_terminated_length": 1133.0, + "completions/min_length": 669.0, + "completions/min_terminated_length": 669.0, + "epoch": 0.28257064266066517, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.193535910848671, + "kl": 0.009735107421875, + "learning_rate": 9.11921771653318e-07, + "loss": -0.0152, + "num_tokens": 50952089.0, + "reward": 0.0, + "reward_std": 0.446670264005661, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03951087898611898, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06874365470971101, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09727776191382574, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1130 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1412.0, + "completions/mean_length": 1170.9375, + "completions/mean_terminated_length": 1149.0, + "completions/min_length": 914.0, + "completions/min_terminated_length": 914.0, + "epoch": 0.2828207051762941, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9363055680605354, + "kl": 0.00859832763671875, + "learning_rate": 9.116882023268491e-07, + "loss": -0.0214, + "num_tokens": 50996096.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9602591395378113, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03938360991045472, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06416245904347541, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07290277645477447, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1131 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1356.0, + "completions/max_terminated_length": 1356.0, + "completions/mean_length": 1096.4375, + "completions/mean_terminated_length": 1096.4375, + "completions/min_length": 936.0, + "completions/min_terminated_length": 936.0, + "epoch": 0.28307076769192296, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.567686994612439, + "kl": 0.0111083984375, + "learning_rate": 9.114543574059936e-07, + "loss": 0.0246, + "num_tokens": 51024847.0, + "reward": 0.0, + "reward_std": 0.5825423002243042, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.024986111338313674, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.054250914062557755, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.060705726131767744, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1132 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1335.0, + "completions/max_terminated_length": 1335.0, + "completions/mean_length": 1020.0, + "completions/mean_terminated_length": 1020.0, + "completions/min_length": 690.0, + "completions/min_terminated_length": 690.0, + "epoch": 0.2833208302075519, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7410196528347184, + "kl": 0.010406494140625, + "learning_rate": 9.112202370689335e-07, + "loss": -0.009, + "num_tokens": 51070871.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0184413194656372, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12164991131999067, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11283970170575106, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1264911064067352, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1133 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 1264.8125, + "completions/mean_terminated_length": 1157.9091796875, + "completions/min_length": 994.0, + "completions/min_terminated_length": 994.0, + "epoch": 0.2835708927231808, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9283944758838754, + "kl": 0.00762939453125, + "learning_rate": 9.109858414940609e-07, + "loss": 0.0336, + "num_tokens": 51124068.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9864324331283569, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.018987636796001787, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05846958158817082, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12292725943057185, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1134 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1211.3125, + "completions/mean_terminated_length": 1170.071533203125, + "completions/min_length": 969.0, + "completions/min_terminated_length": 969.0, + "epoch": 0.2838209552388097, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9391760734542485, + "kl": 0.00469970703125, + "learning_rate": 9.107511708599772e-07, + "loss": 0.0137, + "num_tokens": 51173233.0, + "reward": 0.0, + "reward_std": 0.9575276374816895, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.008211202751267363, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09510258363272633, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.675, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1135 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1413.25, + "completions/mean_terminated_length": 1361.2000732421875, + "completions/min_length": 1257.0, + "completions/min_terminated_length": 1257.0, + "epoch": 0.2840710177544386, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4375724174019022, + "kl": 0.0058441162109375, + "learning_rate": 9.105162253454935e-07, + "loss": 0.007, + "num_tokens": 51222333.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0485961437225342, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09535164709184649, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09253970511633738, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746357, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1136 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1139.625, + "completions/mean_terminated_length": 1088.1429443359375, + "completions/min_length": 677.0, + "completions/min_terminated_length": 677.0, + "epoch": 0.2843210802700675, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0516373761238467, + "kl": 0.0102691650390625, + "learning_rate": 9.102810051296308e-07, + "loss": -0.0191, + "num_tokens": 51264823.0, + "reward": 0.0, + "reward_std": 0.7585535645484924, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.2788838796638396, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2954743834294408, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059004080210452274, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1137 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1292.625, + "completions/mean_terminated_length": 1244.769287109375, + "completions/min_length": 970.0, + "completions/min_terminated_length": 970.0, + "epoch": 0.28457114278569645, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3553103139449303, + "kl": 0.0081787109375, + "learning_rate": 9.100455103916189e-07, + "loss": 0.0175, + "num_tokens": 51314217.0, + "reward": 0.0, + "reward_std": 0.5303927063941956, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.014652142229219647, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.046317959602942556, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10327955589886442, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1138 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1254.375, + "completions/mean_terminated_length": 1063.3333740234375, + "completions/min_length": 803.0, + "completions/min_terminated_length": 803.0, + "epoch": 0.2848212053013253, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.765600402626584, + "kl": 0.00730133056640625, + "learning_rate": 9.09809741310897e-07, + "loss": -0.09, + "num_tokens": 51361159.0, + "reward": 0.0, + "reward_std": 1.0615721940994263, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02295325005508796, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09020581679933697, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1060223596263578, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1139 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1125.5625, + "completions/mean_terminated_length": 1000.75, + "completions/min_length": 554.0, + "completions/min_terminated_length": 554.0, + "epoch": 0.28507126781695424, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0502331666243387, + "kl": 0.010772705078125, + "learning_rate": 9.095736980671135e-07, + "loss": 0.1221, + "num_tokens": 51408800.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.762362003326416, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.007147911093188203, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05385410935473566, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07302967433402213, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1140 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1277.0, + "completions/max_terminated_length": 1277.0, + "completions/mean_length": 1053.4375, + "completions/mean_terminated_length": 1053.4375, + "completions/min_length": 757.0, + "completions/min_terminated_length": 757.0, + "epoch": 0.28532133033258317, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2552272503047295, + "kl": 0.006622314453125, + "learning_rate": 9.093373808401252e-07, + "loss": 0.0033, + "num_tokens": 51438207.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8035711050033569, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01692255180080635, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.043346723641947624, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042258, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1141 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1369.0, + "completions/max_terminated_length": 1369.0, + "completions/mean_length": 978.9375, + "completions/mean_terminated_length": 978.9375, + "completions/min_length": 631.0, + "completions/min_terminated_length": 631.0, + "epoch": 0.28557139284821204, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.108560969822375, + "kl": 0.01519775390625, + "learning_rate": 9.091007898099979e-07, + "loss": 0.0247, + "num_tokens": 51490342.0, + "reward": 0.0, + "reward_std": 0.8813499808311462, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0100306874012848, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08859948658707079, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5416666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.18519259244445035, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1142 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1349.0, + "completions/mean_length": 1348.125, + "completions/mean_terminated_length": 1230.0, + "completions/min_length": 1115.0, + "completions/min_terminated_length": 1115.0, + "epoch": 0.28582145536384096, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9771591528993757, + "kl": 0.00970458984375, + "learning_rate": 9.088639251570066e-07, + "loss": -0.0248, + "num_tokens": 51543368.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9304469227790833, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06919112470718622, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09438911838011244, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10318986456114841, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1143 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1135.0, + "completions/max_terminated_length": 1135.0, + "completions/mean_length": 856.125, + "completions/mean_terminated_length": 856.125, + "completions/min_length": 621.0, + "completions/min_terminated_length": 621.0, + "epoch": 0.2860715178794699, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.094978189830923, + "kl": 0.012298583984375, + "learning_rate": 9.086267870616338e-07, + "loss": -0.0472, + "num_tokens": 51569834.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.0633715391159058, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.049351484138746717, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09223657356645668, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06426219440409447, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1144 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1314.125, + "completions/mean_terminated_length": 1128.25, + "completions/min_length": 956.0, + "completions/min_terminated_length": 956.0, + "epoch": 0.28632158039509875, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4248180616676875, + "kl": 0.004596710205078125, + "learning_rate": 9.08389375704571e-07, + "loss": 0.013, + "num_tokens": 51621196.0, + "reward": 0.0, + "reward_std": 1.0518419742584229, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08719083227128777, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1988408473937571, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1145 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1397.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 1179.875, + "completions/mean_terminated_length": 1179.875, + "completions/min_length": 845.0, + "completions/min_terminated_length": 845.0, + "epoch": 0.2865716429107277, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3907190285105537, + "kl": 0.010284423828125, + "learning_rate": 9.081516912667179e-07, + "loss": 0.0217, + "num_tokens": 51664738.0, + "reward": 0.0, + "reward_std": 0.6824827194213867, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11111620513571926, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09960960674526334, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09269623828717428, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1146 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1388.0, + "completions/max_terminated_length": 1388.0, + "completions/mean_length": 960.875, + "completions/mean_terminated_length": 960.875, + "completions/min_length": 566.0, + "completions/min_terminated_length": 566.0, + "epoch": 0.2868217054263566, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5103170323985697, + "kl": 0.0073699951171875, + "learning_rate": 9.079137339291819e-07, + "loss": -0.0592, + "num_tokens": 51702648.0, + "reward": 0.0, + "reward_std": 0.9124757051467896, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0356481458830919, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11027755559282876, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11547005383792516, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1147 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1338.875, + "completions/mean_terminated_length": 1177.75, + "completions/min_length": 928.0, + "completions/min_terminated_length": 928.0, + "epoch": 0.2870717679419855, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.012937352336149, + "kl": 0.009246826171875, + "learning_rate": 9.076755038732788e-07, + "loss": 0.014, + "num_tokens": 51767598.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8595066070556641, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10557760474524706, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15592312652115176, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15244914148902494, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1148 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1374.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 952.875, + "completions/mean_terminated_length": 952.875, + "completions/min_length": 540.0, + "completions/min_terminated_length": 540.0, + "epoch": 0.2873218304576144, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9155644551474222, + "kl": 0.012115478515625, + "learning_rate": 9.074370012805318e-07, + "loss": -0.0463, + "num_tokens": 51797204.0, + "reward": 3.725290298461914e-08, + "reward_std": 1.0564980506896973, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05333010497994098, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1702127126204245, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07187952884282611, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1149 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1299.0, + "completions/mean_length": 1267.375, + "completions/mean_terminated_length": 1127.800048828125, + "completions/min_length": 970.0, + "completions/min_terminated_length": 970.0, + "epoch": 0.2875718929732433, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.031466613137433, + "kl": 0.0084228515625, + "learning_rate": 9.071982263326721e-07, + "loss": 0.0319, + "num_tokens": 51849410.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6525626182556152, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.061791293121656876, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04643778642742197, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6416666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12619796324000607, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1150 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1295.0, + "completions/mean_length": 1173.0625, + "completions/mean_terminated_length": 1151.2667236328125, + "completions/min_length": 948.0, + "completions/min_terminated_length": 948.0, + "epoch": 0.28782195548887224, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.725510517807379, + "kl": 0.0093231201171875, + "learning_rate": 9.069591792116383e-07, + "loss": -0.0223, + "num_tokens": 51891627.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8372215628623962, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07831893060196916, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06369840984207098, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238707, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1151 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1440.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1234.0625, + "completions/mean_terminated_length": 1234.0625, + "completions/min_length": 1011.0, + "completions/min_terminated_length": 1011.0, + "epoch": 0.2880720180045011, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5583034611140243, + "kl": 0.011962890625, + "learning_rate": 9.067198600995761e-07, + "loss": -0.0484, + "num_tokens": 51937988.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9127085208892822, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.017377289229569257, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05879137242457128, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11729986896522632, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1152 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1248.0, + "completions/mean_length": 924.6875, + "completions/mean_terminated_length": 886.3333740234375, + "completions/min_length": 730.0, + "completions/min_terminated_length": 730.0, + "epoch": 0.28832208052013003, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8989277791254975, + "kl": 0.0108489990234375, + "learning_rate": 9.064802691788391e-07, + "loss": -0.0035, + "num_tokens": 51978783.0, + "reward": 0.0, + "reward_std": 0.9330646991729736, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0029597213317003887, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03605527707978987, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.102469507659596, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1153 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1284.1875, + "completions/mean_terminated_length": 1212.25, + "completions/min_length": 1035.0, + "completions/min_terminated_length": 1035.0, + "epoch": 0.28857214303575895, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.878163242350567, + "kl": 0.009429931640625, + "learning_rate": 9.062404066319871e-07, + "loss": 0.0362, + "num_tokens": 52015450.0, + "reward": 1.862645149230957e-08, + "reward_std": 1.0652692317962646, + "rewards/wordcountpos_reward_GEOBench/mean": 1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12857569142677366, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09700232629386614, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503963, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1154 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1376.0, + "completions/max_terminated_length": 1376.0, + "completions/mean_length": 1065.25, + "completions/mean_terminated_length": 1065.25, + "completions/min_length": 850.0, + "completions/min_terminated_length": 850.0, + "epoch": 0.2888222055513878, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5232242223945285, + "kl": 0.0107879638671875, + "learning_rate": 9.060002726417878e-07, + "loss": 0.0036, + "num_tokens": 52065438.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.73051917552948, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07001855123974167, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07999685498146625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.131021626713557, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1155 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1358.0, + "completions/max_terminated_length": 1358.0, + "completions/mean_length": 1063.8125, + "completions/mean_terminated_length": 1063.8125, + "completions/min_length": 675.0, + "completions/min_terminated_length": 675.0, + "epoch": 0.28907226806701675, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8507513693606055, + "kl": 0.0117950439453125, + "learning_rate": 9.057598673912154e-07, + "loss": 0.0258, + "num_tokens": 52115843.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9757126569747925, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.4975416072617711, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.326935269350736, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0843274042711568, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1156 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1113.8125, + "completions/mean_terminated_length": 1088.0667724609375, + "completions/min_length": 773.0, + "completions/min_terminated_length": 773.0, + "epoch": 0.28932233058264567, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1927071209553355, + "kl": 0.00844573974609375, + "learning_rate": 9.055191910634502e-07, + "loss": -0.0239, + "num_tokens": 52156136.0, + "reward": 0.0, + "reward_std": 0.8162573575973511, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.014300015978543188, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.027408880329011922, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11729986896522632, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1157 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1090.0, + "completions/mean_length": 911.125, + "completions/mean_terminated_length": 871.86669921875, + "completions/min_length": 552.0, + "completions/min_terminated_length": 552.0, + "epoch": 0.2895723930982746, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9142843905374596, + "kl": 0.01055908203125, + "learning_rate": 9.0527824384188e-07, + "loss": -0.0403, + "num_tokens": 52188442.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.577366828918457, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.30539144563682785, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.37541708634394955, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258098, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1158 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1396.0, + "completions/max_terminated_length": 1396.0, + "completions/mean_length": 1232.25, + "completions/mean_terminated_length": 1232.25, + "completions/min_length": 959.0, + "completions/min_terminated_length": 959.0, + "epoch": 0.28982245561390346, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.876635667618434, + "kl": 0.00730133056640625, + "learning_rate": 9.050370259100984e-07, + "loss": -0.0332, + "num_tokens": 52228054.0, + "reward": -2.60770320892334e-08, + "reward_std": 1.0624054670333862, + "rewards/wordcountpos_reward_GEOBench/mean": -2.60770320892334e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.017443097710736488, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.042844784544347196, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0739118594202782, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1159 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1208.0, + "completions/mean_length": 1222.9375, + "completions/mean_terminated_length": 1056.7000732421875, + "completions/min_length": 852.0, + "completions/min_terminated_length": 852.0, + "epoch": 0.2900725181295324, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.050044118451372, + "kl": 0.0088348388671875, + "learning_rate": 9.047955374519056e-07, + "loss": -0.0022, + "num_tokens": 52281133.0, + "reward": 0.0, + "reward_std": 0.8031957149505615, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.051549455589344005, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1350012648511468, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1548595540529595, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1160 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1469.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1200.4375, + "completions/mean_terminated_length": 1200.4375, + "completions/min_length": 906.0, + "completions/min_terminated_length": 906.0, + "epoch": 0.2903225806451613, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0665931996884597, + "kl": 0.0085601806640625, + "learning_rate": 9.045537786513076e-07, + "loss": -0.0209, + "num_tokens": 52321164.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9438577890396118, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02496364556658468, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07189871551740144, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11729986896522632, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1161 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1345.0, + "completions/max_terminated_length": 1345.0, + "completions/mean_length": 1181.0, + "completions/mean_terminated_length": 1181.0, + "completions/min_length": 977.0, + "completions/min_terminated_length": 977.0, + "epoch": 0.2905726431607902, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.595106958398425, + "kl": 0.00569915771484375, + "learning_rate": 9.043117496925167e-07, + "loss": -0.0123, + "num_tokens": 52362820.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9786962866783142, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02456438229006036, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09665345022309146, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1162 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1434.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1122.25, + "completions/mean_terminated_length": 1122.25, + "completions/min_length": 924.0, + "completions/min_terminated_length": 924.0, + "epoch": 0.2908227056764191, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.353880579522538, + "kl": 0.011627197265625, + "learning_rate": 9.040694507599507e-07, + "loss": -0.0198, + "num_tokens": 52400952.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9002624750137329, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03243353593879787, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04443911426150871, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08255189164891873, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1163 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1147.0, + "completions/max_terminated_length": 1147.0, + "completions/mean_length": 1028.625, + "completions/mean_terminated_length": 1028.625, + "completions/min_length": 929.0, + "completions/min_terminated_length": 929.0, + "epoch": 0.291072768192048, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.023969889504414, + "kl": 0.00582122802734375, + "learning_rate": 9.038268820382336e-07, + "loss": 0.0053, + "num_tokens": 52445106.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8869227170944214, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04135523165665398, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05604438975967207, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09888264649460886, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1164 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1273.0, + "completions/max_terminated_length": 1273.0, + "completions/mean_length": 1074.5625, + "completions/mean_terminated_length": 1074.5625, + "completions/min_length": 849.0, + "completions/min_terminated_length": 849.0, + "epoch": 0.2913228307076769, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8190731643431945, + "kl": 0.0115203857421875, + "learning_rate": 9.035840437121947e-07, + "loss": -0.0215, + "num_tokens": 52486619.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0145660638809204, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05338055891452379, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12187113890865295, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05426273532033236, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1165 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 1237.3125, + "completions/mean_terminated_length": 1176.6923828125, + "completions/min_length": 846.0, + "completions/min_terminated_length": 846.0, + "epoch": 0.2915728932233058, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.944428468808221, + "kl": 0.0108795166015625, + "learning_rate": 9.033409359668685e-07, + "loss": -0.0347, + "num_tokens": 52535568.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9597766399383545, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.044156361023542645, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05054099706218258, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8250000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1166 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1338.0, + "completions/mean_length": 1198.875, + "completions/mean_terminated_length": 1098.5, + "completions/min_length": 722.0, + "completions/min_terminated_length": 722.0, + "epoch": 0.29182295573893474, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.57883175858149, + "kl": 0.008209228515625, + "learning_rate": 9.03097558987495e-07, + "loss": -0.0452, + "num_tokens": 52591262.0, + "reward": 0.0, + "reward_std": 0.432937890291214, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.15447941466886755, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19422137252890803, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11344765475923412, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1167 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1451.0, + "completions/mean_length": 1393.625, + "completions/mean_terminated_length": 1329.800048828125, + "completions/min_length": 963.0, + "completions/min_terminated_length": 963.0, + "epoch": 0.29207301825456367, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9716400786699015, + "kl": 0.00492095947265625, + "learning_rate": 9.028539129595197e-07, + "loss": -0.0025, + "num_tokens": 52635960.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.920956015586853, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0909822937597079, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1168 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 1112.75, + "completions/mean_terminated_length": 1086.933349609375, + "completions/min_length": 575.0, + "completions/min_terminated_length": 575.0, + "epoch": 0.29232308077019253, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.145876474900216, + "kl": 0.008941650390625, + "learning_rate": 9.026099980685925e-07, + "loss": 0.0189, + "num_tokens": 52669604.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8170521259307861, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.008156799774915383, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.014244678372584705, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.161245154965971, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1169 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1291.0, + "completions/max_terminated_length": 1291.0, + "completions/mean_length": 984.0, + "completions/mean_terminated_length": 984.0, + "completions/min_length": 782.0, + "completions/min_terminated_length": 782.0, + "epoch": 0.29257314328582146, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.741715016794595, + "kl": 0.0117950439453125, + "learning_rate": 9.023658145005685e-07, + "loss": -0.0585, + "num_tokens": 52706956.0, + "reward": 0.0, + "reward_std": 0.8073769211769104, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08463783131690501, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08372611507458337, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.19007795671678931, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1170 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1166.1875, + "completions/mean_terminated_length": 1143.933349609375, + "completions/min_length": 855.0, + "completions/min_terminated_length": 855.0, + "epoch": 0.2928232058014504, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.705689683882935, + "kl": 0.011077880859375, + "learning_rate": 9.021213624415076e-07, + "loss": -0.0221, + "num_tokens": 52756615.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9123117327690125, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.033152982809440006, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03859690794942738, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298364, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1171 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1153.1875, + "completions/mean_terminated_length": 1103.6429443359375, + "completions/min_length": 771.0, + "completions/min_terminated_length": 771.0, + "epoch": 0.29307326831707925, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.332837939677952, + "kl": 0.01019287109375, + "learning_rate": 9.018766420776738e-07, + "loss": 0.0332, + "num_tokens": 52807010.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0613384246826172, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09478364672653038, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13402401912588144, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6124999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07391185942027816, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1172 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1333.375, + "completions/mean_terminated_length": 1203.77783203125, + "completions/min_length": 968.0, + "completions/min_terminated_length": 968.0, + "epoch": 0.2933233308327082, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8913268426707046, + "kl": 0.012054443359375, + "learning_rate": 9.016316535955363e-07, + "loss": -0.0315, + "num_tokens": 52871248.0, + "reward": 0.0, + "reward_std": 0.8631778955459595, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1173 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1419.0, + "completions/max_terminated_length": 1419.0, + "completions/mean_length": 1087.625, + "completions/mean_terminated_length": 1087.625, + "completions/min_length": 959.0, + "completions/min_terminated_length": 959.0, + "epoch": 0.2935733933483371, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.635565765386067, + "kl": 0.00804901123046875, + "learning_rate": 9.013863971817681e-07, + "loss": 0.0016, + "num_tokens": 52919266.0, + "reward": 0.0, + "reward_std": 0.8623284697532654, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.006341604415750612, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09653306798383536, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07503085784948502, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1174 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1181.0, + "completions/max_terminated_length": 1181.0, + "completions/mean_length": 967.1875, + "completions/mean_terminated_length": 967.1875, + "completions/min_length": 807.0, + "completions/min_terminated_length": 807.0, + "epoch": 0.29382345586396597, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5939941253651977, + "kl": 0.01055908203125, + "learning_rate": 9.01140873023246e-07, + "loss": 0.0088, + "num_tokens": 52948213.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0492569208145142, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03748124636595282, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07900367717513548, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12041594578792296, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1175 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1340.5625, + "completions/mean_terminated_length": 1268.0909423828125, + "completions/min_length": 738.0, + "completions/min_terminated_length": 738.0, + "epoch": 0.2940735183795949, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1595826169175236, + "kl": 0.0112762451171875, + "learning_rate": 9.008950813070518e-07, + "loss": -0.0134, + "num_tokens": 53009102.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9196075201034546, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.038371711354889634, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06861130011468775, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316813, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1176 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1376.0, + "completions/max_terminated_length": 1376.0, + "completions/mean_length": 1049.25, + "completions/mean_terminated_length": 1049.25, + "completions/min_length": 674.0, + "completions/min_terminated_length": 674.0, + "epoch": 0.2943235808952238, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6412030041707686, + "kl": 0.0137786865234375, + "learning_rate": 9.006490222204703e-07, + "loss": 0.0161, + "num_tokens": 53061106.0, + "reward": 0.0, + "reward_std": 0.6455578804016113, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12825604884873848, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13713888767378207, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0925962962222252, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1177 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1380.0, + "completions/max_terminated_length": 1380.0, + "completions/mean_length": 1079.1875, + "completions/mean_terminated_length": 1079.1875, + "completions/min_length": 635.0, + "completions/min_terminated_length": 635.0, + "epoch": 0.29457364341085274, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3555707918830997, + "kl": 0.0111236572265625, + "learning_rate": 9.004026959509905e-07, + "loss": 0.0087, + "num_tokens": 53102829.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.039689064025879, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00778735387413233, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05462988591273973, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10036968702787748, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1178 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1239.5, + "completions/mean_terminated_length": 1202.2857666015625, + "completions/min_length": 820.0, + "completions/min_terminated_length": 820.0, + "epoch": 0.2948237059264816, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9186009356992795, + "kl": 0.008514404296875, + "learning_rate": 9.001561026863047e-07, + "loss": 0.0162, + "num_tokens": 53140549.0, + "reward": 0.0, + "reward_std": 0.5740969181060791, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.060030020312083744, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12329823247575154, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15049301694147857, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1179 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1333.0, + "completions/mean_length": 1323.5, + "completions/mean_terminated_length": 1147.0, + "completions/min_length": 995.0, + "completions/min_terminated_length": 995.0, + "epoch": 0.29507376844211053, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.018040668533999, + "kl": 0.0089874267578125, + "learning_rate": 8.999092426143089e-07, + "loss": -0.017, + "num_tokens": 53190253.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9633491039276123, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1163739428737563, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10428153325483963, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05692750425533113, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1180 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1332.0, + "completions/max_terminated_length": 1332.0, + "completions/mean_length": 1153.9375, + "completions/mean_terminated_length": 1153.9375, + "completions/min_length": 894.0, + "completions/min_terminated_length": 894.0, + "epoch": 0.29532383095773945, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.732941384173861, + "kl": 0.007354736328125, + "learning_rate": 8.996621159231022e-07, + "loss": -0.0019, + "num_tokens": 53231156.0, + "reward": 0.0, + "reward_std": 0.8251171708106995, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.018747217671694905, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13705157445373106, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11279282877125756, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1181 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1386.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 1142.875, + "completions/mean_terminated_length": 1142.875, + "completions/min_length": 894.0, + "completions/min_terminated_length": 894.0, + "epoch": 0.2955738934733683, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3214197486401, + "kl": 0.0066070556640625, + "learning_rate": 8.994147228009867e-07, + "loss": -0.0059, + "num_tokens": 53265938.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.0621826648712158, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0030477359656461197, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07961221914657131, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952503, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1182 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1092.5, + "completions/mean_terminated_length": 1065.3333740234375, + "completions/min_length": 559.0, + "completions/min_terminated_length": 559.0, + "epoch": 0.29582395598899724, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9387731094579532, + "kl": 0.00888824462890625, + "learning_rate": 8.991670634364681e-07, + "loss": -0.0289, + "num_tokens": 53321242.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.535271167755127, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06302658834309166, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07487570415541867, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08777074514725113, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1183 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1321.0, + "completions/max_terminated_length": 1321.0, + "completions/mean_length": 1014.375, + "completions/mean_terminated_length": 1014.375, + "completions/min_length": 912.0, + "completions/min_terminated_length": 912.0, + "epoch": 0.29607401850462617, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8514241129501876, + "kl": 0.0081634521484375, + "learning_rate": 8.989191380182545e-07, + "loss": 0.0376, + "num_tokens": 53366904.0, + "reward": 0.0, + "reward_std": 0.7303670644760132, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11730321206251672, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1935617976134636, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 1184 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1217.8125, + "completions/mean_terminated_length": 1177.5, + "completions/min_length": 983.0, + "completions/min_terminated_length": 983.0, + "epoch": 0.29632408102025504, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4415554021570327, + "kl": 0.0127716064453125, + "learning_rate": 8.986709467352568e-07, + "loss": -0.055, + "num_tokens": 53411517.0, + "reward": 0.0, + "reward_std": 0.9080904126167297, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.013141879362435854, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.024110053257729826, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08432740427115681, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1185 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1299.0, + "completions/max_terminated_length": 1299.0, + "completions/mean_length": 992.25, + "completions/mean_terminated_length": 992.25, + "completions/min_length": 711.0, + "completions/min_terminated_length": 711.0, + "epoch": 0.29657414353588396, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.085083520712462, + "kl": 0.0086517333984375, + "learning_rate": 8.984224897765884e-07, + "loss": -0.0557, + "num_tokens": 53448313.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.9266807436943054, + "rewards/wordcountpos_reward_GEOBench/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.014269688148107574, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.026368284456262353, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.102469507659596, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1186 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1150.0, + "completions/mean_length": 1450.75, + "completions/mean_terminated_length": 1106.0, + "completions/min_length": 1062.0, + "completions/min_terminated_length": 1062.0, + "epoch": 0.2968242060515129, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3640575578036063, + "kl": 0.00616455078125, + "learning_rate": 8.981737673315655e-07, + "loss": 0.0329, + "num_tokens": 53509269.0, + "reward": 0.0, + "reward_std": 0.913100004196167, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.032472426082544026, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03480654400243118, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1187 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1481.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1023.625, + "completions/mean_terminated_length": 1023.625, + "completions/min_length": 533.0, + "completions/min_terminated_length": 533.0, + "epoch": 0.2970742685671418, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0863880774445347, + "kl": 0.00946807861328125, + "learning_rate": 8.979247795897059e-07, + "loss": -0.0373, + "num_tokens": 53537967.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0143927335739136, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04040122698545328, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15855404773034618, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1188 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1428.0, + "completions/mean_length": 1134.9375, + "completions/mean_terminated_length": 1110.60009765625, + "completions/min_length": 862.0, + "completions/min_terminated_length": 862.0, + "epoch": 0.2973243310827707, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.451765422071213, + "kl": 0.013397216796875, + "learning_rate": 8.976755267407306e-07, + "loss": 0.0021, + "num_tokens": 53589350.0, + "reward": 0.0, + "reward_std": 0.8723480105400085, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.020637533894191408, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0670003962207655, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16329931618554522, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1189 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1229.0, + "completions/mean_length": 1262.8125, + "completions/mean_terminated_length": 1025.625, + "completions/min_length": 887.0, + "completions/min_terminated_length": 887.0, + "epoch": 0.2975743935983996, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9967317518491714, + "kl": 0.00624847412109375, + "learning_rate": 8.974260089745614e-07, + "loss": -0.0297, + "num_tokens": 53644075.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.054693579673767, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.053398331328712224, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09639647340215667, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04013864859597436, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1190 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1409.8125, + "completions/mean_terminated_length": 1211.4000244140625, + "completions/min_length": 639.0, + "completions/min_terminated_length": 639.0, + "epoch": 0.2978244561140285, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.7973178052848144, + "kl": 0.005916595458984375, + "learning_rate": 8.971762264813228e-07, + "loss": -0.0246, + "num_tokens": 53695624.0, + "reward": 0.0, + "reward_std": 0.8682726621627808, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.005747756177443989, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07185455743910749, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1191 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1414.0, + "completions/mean_length": 1236.125, + "completions/mean_terminated_length": 1218.533447265625, + "completions/min_length": 852.0, + "completions/min_terminated_length": 852.0, + "epoch": 0.2980745186296574, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4396693054470457, + "kl": 0.006744384765625, + "learning_rate": 8.969261794513408e-07, + "loss": -0.0402, + "num_tokens": 53749594.0, + "reward": 0.0, + "reward_std": 0.8011016845703125, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.14331181028720963, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08409620487075355, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12816366850994057, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1192 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1374.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 1153.5625, + "completions/mean_terminated_length": 1153.5625, + "completions/min_length": 982.0, + "completions/min_terminated_length": 982.0, + "epoch": 0.2983245811452863, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.640033088552631, + "kl": 0.01139068603515625, + "learning_rate": 8.966758680751427e-07, + "loss": -0.0151, + "num_tokens": 53785235.0, + "reward": 0.0, + "reward_std": 0.9230592250823975, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.013115530388445193, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05480608373726796, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04849589520621159, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1193 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1320.0, + "completions/max_terminated_length": 1320.0, + "completions/mean_length": 1056.75, + "completions/mean_terminated_length": 1056.75, + "completions/min_length": 915.0, + "completions/min_terminated_length": 915.0, + "epoch": 0.29857464366091524, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3924814461556543, + "kl": 0.0111083984375, + "learning_rate": 8.964252925434578e-07, + "loss": -0.0025, + "num_tokens": 53820671.0, + "reward": 0.0, + "reward_std": 0.8826729655265808, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.011701921674307132, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07154462130872898, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09496588081262933, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1194 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1451.0, + "completions/mean_length": 1091.4375, + "completions/mean_terminated_length": 1064.2000732421875, + "completions/min_length": 530.0, + "completions/min_terminated_length": 530.0, + "epoch": 0.29882470617654416, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5563506727488305, + "kl": 0.01004791259765625, + "learning_rate": 8.96174453047216e-07, + "loss": -0.0731, + "num_tokens": 53855806.0, + "reward": 0.0, + "reward_std": 0.44092732667922974, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03739526868946356, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05843159862911866, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1465024333004847, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1195 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1354.0, + "completions/mean_length": 1238.8125, + "completions/mean_terminated_length": 1151.75, + "completions/min_length": 843.0, + "completions/min_terminated_length": 843.0, + "epoch": 0.29907476869217303, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.961294879580483, + "kl": 0.01025390625, + "learning_rate": 8.959233497775489e-07, + "loss": 0.0061, + "num_tokens": 53910795.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0181138515472412, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09929470218510253, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1263994683460607, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08432740427115681, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1196 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1403.0, + "completions/mean_length": 1218.6875, + "completions/mean_terminated_length": 1153.769287109375, + "completions/min_length": 931.0, + "completions/min_terminated_length": 931.0, + "epoch": 0.29932483120780196, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5871776160240243, + "kl": 0.00923919677734375, + "learning_rate": 8.956719829257888e-07, + "loss": -0.0498, + "num_tokens": 53961150.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9816817045211792, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03429008241831853, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10332728299090464, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11792967144619462, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1197 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1071.0, + "completions/max_terminated_length": 1071.0, + "completions/mean_length": 860.3125, + "completions/mean_terminated_length": 860.3125, + "completions/min_length": 713.0, + "completions/min_terminated_length": 713.0, + "epoch": 0.2995748937234309, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3003345721031243, + "kl": 0.0068511962890625, + "learning_rate": 8.954203526834686e-07, + "loss": 0.0192, + "num_tokens": 53987547.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0121970176696777, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.018933210527773574, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.027060488136229046, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07084150279686705, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1198 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1291.0, + "completions/mean_length": 1106.4375, + "completions/mean_terminated_length": 1015.6154174804688, + "completions/min_length": 900.0, + "completions/min_terminated_length": 900.0, + "epoch": 0.29982495623905975, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9603023546953895, + "kl": 0.007415771484375, + "learning_rate": 8.951684592423226e-07, + "loss": -0.0342, + "num_tokens": 54030354.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7926464676856995, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.015025014658913491, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.168857226169081, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258102, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1199 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1429.625, + "completions/mean_terminated_length": 1274.800048828125, + "completions/min_length": 841.0, + "completions/min_terminated_length": 841.0, + "epoch": 0.30007501875468867, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9569518284747662, + "kl": 0.01055908203125, + "learning_rate": 8.949163027942849e-07, + "loss": -0.0319, + "num_tokens": 54088124.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0267547369003296, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10920012930723197, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11618866076186583, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258099, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1200 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1047.875, + "completions/mean_terminated_length": 1017.7333984375, + "completions/min_length": 753.0, + "completions/min_terminated_length": 753.0, + "epoch": 0.3003250812703176, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.585102121791689, + "kl": 0.01055908203125, + "learning_rate": 8.946638835314905e-07, + "loss": 0.0173, + "num_tokens": 54134834.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9285773038864136, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.009369891471875176, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.021302853522128545, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11925695879998881, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1201 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1266.25, + "completions/mean_terminated_length": 1084.4444580078125, + "completions/min_length": 900.0, + "completions/min_terminated_length": 900.0, + "epoch": 0.30057514378594646, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7324062140868404, + "kl": 0.00998687744140625, + "learning_rate": 8.944112016462746e-07, + "loss": -0.0207, + "num_tokens": 54184654.0, + "reward": 0.0, + "reward_std": 0.9125763177871704, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0045885899779005744, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.018354359911602298, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07290277645477444, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1202 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1408.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 1027.25, + "completions/mean_terminated_length": 1027.25, + "completions/min_length": 698.0, + "completions/min_terminated_length": 698.0, + "epoch": 0.3008252063015754, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.868664645662487, + "kl": 0.00757598876953125, + "learning_rate": 8.94158257331172e-07, + "loss": -0.0299, + "num_tokens": 54231898.0, + "reward": 0.0, + "reward_std": 0.8390709161758423, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019429508188049376, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11693683093518384, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746357, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1203 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1222.125, + "completions/mean_terminated_length": 1158.0, + "completions/min_length": 865.0, + "completions/min_terminated_length": 865.0, + "epoch": 0.3010752688172043, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2196545536545775, + "kl": 0.0105438232421875, + "learning_rate": 8.939050507789182e-07, + "loss": 0.0078, + "num_tokens": 54278044.0, + "reward": 0.0, + "reward_std": 1.0197193622589111, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06121991148165406, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.24378376404331942, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10027739304327551, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1204 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1373.0, + "completions/mean_length": 959.0625, + "completions/mean_terminated_length": 881.7857666015625, + "completions/min_length": 573.0, + "completions/min_terminated_length": 573.0, + "epoch": 0.30132533133283324, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.926495822501635, + "kl": 0.011016845703125, + "learning_rate": 8.936515821824481e-07, + "loss": 0.0076, + "num_tokens": 54324845.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0414206981658936, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02180952955388306, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.040226567680777536, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16843506277010845, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1205 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1185.0, + "completions/max_terminated_length": 1185.0, + "completions/mean_length": 958.875, + "completions/mean_terminated_length": 958.875, + "completions/min_length": 805.0, + "completions/min_terminated_length": 805.0, + "epoch": 0.3015753938484621, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.181858439099294, + "kl": 0.010162353515625, + "learning_rate": 8.933978517348962e-07, + "loss": -0.0186, + "num_tokens": 54368699.0, + "reward": 0.0, + "reward_std": 0.9993492960929871, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10588658912652821, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11806630352178385, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10878112581387148, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1206 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1381.0, + "completions/mean_length": 1179.875, + "completions/mean_terminated_length": 1034.3636474609375, + "completions/min_length": 749.0, + "completions/min_terminated_length": 749.0, + "epoch": 0.301825456364091, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.93710430803906, + "kl": 0.009246826171875, + "learning_rate": 8.93143859629597e-07, + "loss": 0.0302, + "num_tokens": 54402977.0, + "reward": 0.0, + "reward_std": 1.065537691116333, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09129711999701984, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07499042139611237, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08509254221575908, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1207 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1106.3125, + "completions/mean_terminated_length": 1080.0667724609375, + "completions/min_length": 769.0, + "completions/min_terminated_length": 769.0, + "epoch": 0.30207551887971995, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4120995948632666, + "kl": 0.0113372802734375, + "learning_rate": 8.928896060600837e-07, + "loss": -0.041, + "num_tokens": 54440870.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.7513419389724731, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.010618214985539208, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11997398060465035, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238707, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1208 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1326.4375, + "completions/mean_terminated_length": 1301.6429443359375, + "completions/min_length": 1143.0, + "completions/min_terminated_length": 1143.0, + "epoch": 0.3023255813953488, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0364700463289895, + "kl": 0.005474090576171875, + "learning_rate": 8.926350912200893e-07, + "loss": -0.0161, + "num_tokens": 54483997.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8850369453430176, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.051546420159771345, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10197149031631914, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901862, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1209 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1382.0, + "completions/max_terminated_length": 1382.0, + "completions/mean_length": 1199.75, + "completions/mean_terminated_length": 1199.75, + "completions/min_length": 975.0, + "completions/min_terminated_length": 975.0, + "epoch": 0.30257564391097774, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1726141508128176, + "kl": 0.012603759765625, + "learning_rate": 8.923803153035454e-07, + "loss": 0.0094, + "num_tokens": 54520561.0, + "reward": -5.21540641784668e-08, + "reward_std": 1.0656747817993164, + "rewards/wordcountpos_reward_GEOBench/mean": -5.21540641784668e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.021676371828643477, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09388592360497452, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1210 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 1354.875, + "completions/mean_terminated_length": 1209.75, + "completions/min_length": 1009.0, + "completions/min_terminated_length": 1009.0, + "epoch": 0.30282570642660667, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0841555049741975, + "kl": 0.009979248046875, + "learning_rate": 8.921252785045829e-07, + "loss": 0.0345, + "num_tokens": 54577391.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0585405826568604, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08129615819359012, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09801478063226564, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05708992257184505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1211 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1275.9375, + "completions/mean_terminated_length": 1243.9285888671875, + "completions/min_length": 979.0, + "completions/min_terminated_length": 979.0, + "epoch": 0.30307576894223553, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7492521280187607, + "kl": 0.00843048095703125, + "learning_rate": 8.918699810175316e-07, + "loss": -0.0312, + "num_tokens": 54629910.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9854578971862793, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00020549252228716126, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05704246240055369, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08777074514725114, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1212 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1388.0, + "completions/mean_length": 1033.5, + "completions/mean_terminated_length": 1002.4000244140625, + "completions/min_length": 634.0, + "completions/min_terminated_length": 634.0, + "epoch": 0.30332583145786446, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.487022402053469, + "kl": 0.004119873046875, + "learning_rate": 8.916144230369195e-07, + "loss": -0.0453, + "num_tokens": 54670998.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0087707042694092, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06126105746403688, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07717793028597648, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12524050936172845, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1213 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1197.4375, + "completions/mean_terminated_length": 1177.2667236328125, + "completions/min_length": 868.0, + "completions/min_terminated_length": 868.0, + "epoch": 0.3035758939734934, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0519825561853984, + "kl": 0.010498046875, + "learning_rate": 8.913586047574729e-07, + "loss": -0.035, + "num_tokens": 54719157.0, + "reward": 0.0, + "reward_std": 0.9786341786384583, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01011327914530728, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02824625279296795, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10532137766186216, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1214 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1285.6875, + "completions/mean_terminated_length": 1214.25, + "completions/min_length": 656.0, + "completions/min_terminated_length": 656.0, + "epoch": 0.3038259564891223, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0364768531039505, + "kl": 0.0097503662109375, + "learning_rate": 8.911025263741175e-07, + "loss": -0.0799, + "num_tokens": 54774688.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7972286939620972, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12205120001678034, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1465697155282773, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12382783747337807, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1215 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1276.9375, + "completions/mean_terminated_length": 1245.071533203125, + "completions/min_length": 913.0, + "completions/min_terminated_length": 913.0, + "epoch": 0.3040760190047512, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1900244848341495, + "kl": 0.004283905029296875, + "learning_rate": 8.908461880819763e-07, + "loss": 0.0216, + "num_tokens": 54819935.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.9602987766265869, + "rewards/wordcountpos_reward_GEOBench/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05107195556204741, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0557609211443386, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07084150279686706, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1216 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1290.0, + "completions/mean_length": 1148.375, + "completions/mean_terminated_length": 1098.1429443359375, + "completions/min_length": 982.0, + "completions/min_terminated_length": 982.0, + "epoch": 0.3043260815203801, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.786511652234718, + "kl": 0.00843048095703125, + "learning_rate": 8.905895900763702e-07, + "loss": 0.0189, + "num_tokens": 54862365.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6400191783905029, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1274396327878428, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20789577214009938, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1102186379345533, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1217 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1191.0, + "completions/max_terminated_length": 1191.0, + "completions/mean_length": 923.625, + "completions/mean_terminated_length": 923.625, + "completions/min_length": 651.0, + "completions/min_terminated_length": 651.0, + "epoch": 0.304576144036009, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.226147142335819, + "kl": 0.0128936767578125, + "learning_rate": 8.903327325528187e-07, + "loss": -0.0593, + "num_tokens": 54888151.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0008233785629272, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.024532607679943287, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02380972872930064, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238707, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1218 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1163.0, + "completions/max_terminated_length": 1163.0, + "completions/mean_length": 955.5625, + "completions/mean_terminated_length": 955.5625, + "completions/min_length": 776.0, + "completions/min_terminated_length": 776.0, + "epoch": 0.3048262065516379, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.376653291108612, + "kl": 0.00946044921875, + "learning_rate": 8.900756157070389e-07, + "loss": 0.0061, + "num_tokens": 54929264.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.595533013343811, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02403413816252718, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1339879964158582, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1605545943838973, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1219 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1168.4375, + "completions/mean_terminated_length": 1146.3333740234375, + "completions/min_length": 773.0, + "completions/min_terminated_length": 773.0, + "epoch": 0.3050762690672668, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.029639071224109, + "kl": 0.00917816162109375, + "learning_rate": 8.898182397349447e-07, + "loss": -0.0206, + "num_tokens": 54977551.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.830763578414917, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.028814836220515547, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03872268356333447, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08421753138505425, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1220 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1388.0, + "completions/mean_length": 1011.8125, + "completions/mean_terminated_length": 979.2667236328125, + "completions/min_length": 655.0, + "completions/min_terminated_length": 655.0, + "epoch": 0.30532633158289574, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.147841537457434, + "kl": 0.00923919677734375, + "learning_rate": 8.895606048326483e-07, + "loss": 0.0026, + "num_tokens": 55015804.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8816462755203247, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.071511745118158, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08393456695340762, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10390522747338701, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1221 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1267.0, + "completions/mean_length": 1082.125, + "completions/mean_terminated_length": 1054.2667236328125, + "completions/min_length": 710.0, + "completions/min_terminated_length": 710.0, + "epoch": 0.3055763940985246, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.494972290758228, + "kl": 0.01239013671875, + "learning_rate": 8.89302711196459e-07, + "loss": -0.0072, + "num_tokens": 55070038.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.013606071472168, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.007197076811300727, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05484332223715817, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0768596604689834, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1222 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1251.75, + "completions/mean_terminated_length": 1138.9091796875, + "completions/min_length": 888.0, + "completions/min_terminated_length": 888.0, + "epoch": 0.30582645661415353, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8053706720058322, + "kl": 0.01043701171875, + "learning_rate": 8.890445590228832e-07, + "loss": 0.0136, + "num_tokens": 55116410.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8541104793548584, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04498156882895489, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13760166214134273, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14504150108516198, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1223 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1405.0, + "completions/max_terminated_length": 1405.0, + "completions/mean_length": 1216.5625, + "completions/mean_terminated_length": 1216.5625, + "completions/min_length": 953.0, + "completions/min_terminated_length": 953.0, + "epoch": 0.30607651912978245, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1687908361404755, + "kl": 0.01065826416015625, + "learning_rate": 8.887861485086244e-07, + "loss": 0.0041, + "num_tokens": 55166315.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9600927829742432, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0957494185292262, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12264245884733233, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11547005383792518, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1224 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1157.0, + "completions/max_terminated_length": 1157.0, + "completions/mean_length": 977.4375, + "completions/mean_terminated_length": 977.4375, + "completions/min_length": 674.0, + "completions/min_terminated_length": 674.0, + "epoch": 0.3063265816454114, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2660825369495896, + "kl": 0.012451171875, + "learning_rate": 8.885274798505824e-07, + "loss": -0.0018, + "num_tokens": 55201474.0, + "reward": 0.0, + "reward_std": 0.874707818031311, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12021092289883042, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08284530322627226, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087683, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1225 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1092.0625, + "completions/mean_terminated_length": 956.0833740234375, + "completions/min_length": 594.0, + "completions/min_terminated_length": 594.0, + "epoch": 0.30657664416104025, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0894594969171645, + "kl": 0.0107421875, + "learning_rate": 8.882685532458544e-07, + "loss": -0.0035, + "num_tokens": 55246947.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0428954362869263, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.005923734275099145, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08610851472946286, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1166666666666667, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1226 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1483.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1213.1875, + "completions/mean_terminated_length": 1213.1875, + "completions/min_length": 867.0, + "completions/min_terminated_length": 867.0, + "epoch": 0.30682670667666917, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3458055800842863, + "kl": 0.01470947265625, + "learning_rate": 8.880093688917338e-07, + "loss": 0.0315, + "num_tokens": 55299590.0, + "reward": 1.862645149230957e-08, + "reward_std": 1.05448579788208, + "rewards/wordcountpos_reward_GEOBench/mean": 1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.22044342801292865, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16613047954539695, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11122216672215289, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 1227 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1347.0, + "completions/max_terminated_length": 1347.0, + "completions/mean_length": 950.375, + "completions/mean_terminated_length": 950.375, + "completions/min_length": 672.0, + "completions/min_terminated_length": 672.0, + "epoch": 0.3070767691922981, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2973413562821765, + "kl": 0.011043548583984375, + "learning_rate": 8.877499269857107e-07, + "loss": 0.0065, + "num_tokens": 55340548.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8163482546806335, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0804738741514042, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11917843590648217, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316813, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1228 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1276.0, + "completions/mean_terminated_length": 1261.0667724609375, + "completions/min_length": 1019.0, + "completions/min_terminated_length": 1019.0, + "epoch": 0.30732683170792696, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8503118678932124, + "kl": 0.0106658935546875, + "learning_rate": 8.874902277254708e-07, + "loss": -0.0053, + "num_tokens": 55392332.0, + "reward": 0.0, + "reward_std": 1.0427963733673096, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04649934625011029, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06852049910383065, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1229 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1061.0, + "completions/mean_length": 1254.6875, + "completions/mean_terminated_length": 939.2857666015625, + "completions/min_length": 772.0, + "completions/min_terminated_length": 772.0, + "epoch": 0.3075768942235559, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.294895546477739, + "kl": 0.0108642578125, + "learning_rate": 8.872302713088965e-07, + "loss": 0.0315, + "num_tokens": 55444567.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0033539533615112, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353545, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1230 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1287.0, + "completions/mean_length": 1124.375, + "completions/mean_terminated_length": 1070.71435546875, + "completions/min_length": 784.0, + "completions/min_terminated_length": 784.0, + "epoch": 0.3078269567391848, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2111852087639585, + "kl": 0.00618743896484375, + "learning_rate": 8.86970057934066e-07, + "loss": 0.0037, + "num_tokens": 55484085.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.039217472076416, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.005134002981750508, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.056400174910446826, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06206328908341755, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1231 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1290.0, + "completions/mean_length": 1009.1875, + "completions/mean_terminated_length": 845.5833740234375, + "completions/min_length": 487.0, + "completions/min_terminated_length": 487.0, + "epoch": 0.3080770192548137, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.22018203282512, + "kl": 0.0113983154296875, + "learning_rate": 8.867095877992532e-07, + "loss": -0.1055, + "num_tokens": 55525888.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6765516400337219, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08574959980050728, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07906302473709823, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036264, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1232 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1244.0, + "completions/max_terminated_length": 1244.0, + "completions/mean_length": 942.625, + "completions/mean_terminated_length": 942.625, + "completions/min_length": 656.0, + "completions/min_terminated_length": 656.0, + "epoch": 0.3083270817704426, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7915820220475847, + "kl": 0.0117645263671875, + "learning_rate": 8.864488611029277e-07, + "loss": -0.0284, + "num_tokens": 55565722.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9785919189453125, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04460261247001934, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04748274161864952, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1275843947266976, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1233 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1499.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1114.25, + "completions/mean_terminated_length": 1114.25, + "completions/min_length": 655.0, + "completions/min_terminated_length": 655.0, + "epoch": 0.3085771442860715, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.940629920717229, + "kl": 0.0100555419921875, + "learning_rate": 8.861878780437545e-07, + "loss": -0.0676, + "num_tokens": 55616742.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0044103860855103, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1121576283367444, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06834653393045816, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11855612829185828, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1234 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1424.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1031.5, + "completions/mean_terminated_length": 1031.5, + "completions/min_length": 732.0, + "completions/min_terminated_length": 732.0, + "epoch": 0.30882720680170045, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8566802862889533, + "kl": 0.0101318359375, + "learning_rate": 8.85926638820594e-07, + "loss": -0.0018, + "num_tokens": 55657094.0, + "reward": 0.0, + "reward_std": 0.9838023781776428, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0008736604543214724, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05866043859271723, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1235 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1047.0, + "completions/mean_terminated_length": 982.2857666015625, + "completions/min_length": 609.0, + "completions/min_terminated_length": 609.0, + "epoch": 0.3090772693173293, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3649651661277042, + "kl": 0.0107574462890625, + "learning_rate": 8.85665143632502e-07, + "loss": -0.1279, + "num_tokens": 55712406.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9223273396492004, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.003308683473685998, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17810510794073103, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08432740427115681, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1236 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1133.1875, + "completions/mean_terminated_length": 1108.7333984375, + "completions/min_length": 914.0, + "completions/min_terminated_length": 914.0, + "epoch": 0.30932733183295824, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5608345712846075, + "kl": 0.0144500732421875, + "learning_rate": 8.85403392678729e-07, + "loss": -0.0592, + "num_tokens": 55757961.0, + "reward": 0.0, + "reward_std": 1.0014978647232056, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06379017661079413, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07108468216180186, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08421753138505425, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1237 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1333.0, + "completions/mean_length": 1190.25, + "completions/mean_terminated_length": 1118.769287109375, + "completions/min_length": 928.0, + "completions/min_terminated_length": 928.0, + "epoch": 0.30957739434858716, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.293290229932778, + "kl": 0.010162353515625, + "learning_rate": 8.851413861587205e-07, + "loss": 0.0234, + "num_tokens": 55802853.0, + "reward": 0.0, + "reward_std": 0.6172600388526917, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1535597350929415, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19998658832380345, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1238 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1396.0, + "completions/mean_length": 1231.375, + "completions/mean_terminated_length": 1213.4666748046875, + "completions/min_length": 895.0, + "completions/min_terminated_length": 895.0, + "epoch": 0.30982745686421603, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.201998407812096, + "kl": 0.014434814453125, + "learning_rate": 8.848791242721166e-07, + "loss": -0.0458, + "num_tokens": 55849547.0, + "reward": 0.0, + "reward_std": 0.8060121536254883, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08564798036641437, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07915268399442278, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14168300559373406, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1239 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1467.0, + "completions/max_terminated_length": 1467.0, + "completions/mean_length": 1089.75, + "completions/mean_terminated_length": 1089.75, + "completions/min_length": 715.0, + "completions/min_terminated_length": 715.0, + "epoch": 0.31007751937984496, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8197751527432406, + "kl": 0.00925445556640625, + "learning_rate": 8.846166072187526e-07, + "loss": -0.054, + "num_tokens": 55893831.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.9972229599952698, + "rewards/wordcountpos_reward_GEOBench/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009246130732879583, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07998820767002952, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1112221667221529, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1240 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1136.25, + "completions/mean_terminated_length": 918.0, + "completions/min_length": 517.0, + "completions/min_terminated_length": 517.0, + "epoch": 0.3103275818954739, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.299042588801749, + "kl": 0.00504302978515625, + "learning_rate": 8.843538351986574e-07, + "loss": -0.062, + "num_tokens": 55942763.0, + "reward": 0.0, + "reward_std": 0.6083270311355591, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08468701335132282, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18382478697275648, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06309898162000302, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1241 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1335.6875, + "completions/mean_terminated_length": 1297.769287109375, + "completions/min_length": 1135.0, + "completions/min_terminated_length": 1135.0, + "epoch": 0.31057764441110275, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9420226217716405, + "kl": 0.0111083984375, + "learning_rate": 8.840908084120548e-07, + "loss": -0.012, + "num_tokens": 56000406.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9069857597351074, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.017203316232785433, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16662432899817503, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06070572613176774, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1242 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1364.0, + "completions/max_terminated_length": 1364.0, + "completions/mean_length": 1122.625, + "completions/mean_terminated_length": 1122.625, + "completions/min_length": 886.0, + "completions/min_terminated_length": 886.0, + "epoch": 0.3108277069267317, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9572784984516542, + "kl": 0.009796142578125, + "learning_rate": 8.83827527059362e-07, + "loss": -0.0366, + "num_tokens": 56049224.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6465604901313782, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.268851119625728, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3763177778278091, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.050000000000000024, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1243 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1379.0, + "completions/max_terminated_length": 1379.0, + "completions/mean_length": 1156.3125, + "completions/mean_terminated_length": 1156.3125, + "completions/min_length": 874.0, + "completions/min_terminated_length": 874.0, + "epoch": 0.3110777694423606, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9017456075164874, + "kl": 0.01031494140625, + "learning_rate": 8.83563991341191e-07, + "loss": -0.0036, + "num_tokens": 56092477.0, + "reward": 0.0, + "reward_std": 0.9222713112831116, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03667383255845325, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1928892987149647, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.103905227473387, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1244 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1322.0, + "completions/mean_length": 1360.375, + "completions/mean_terminated_length": 1180.857177734375, + "completions/min_length": 973.0, + "completions/min_terminated_length": 973.0, + "epoch": 0.3113278319579895, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.354089052831367, + "kl": 0.0131072998046875, + "learning_rate": 8.833002014583474e-07, + "loss": 0.0443, + "num_tokens": 56151731.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9580180048942566, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06556900482076057, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.056933175905587705, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6416666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1245 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1444.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 967.1875, + "completions/mean_terminated_length": 967.1875, + "completions/min_length": 436.0, + "completions/min_terminated_length": 436.0, + "epoch": 0.3115778944736184, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8996603584152756, + "kl": 0.0145263671875, + "learning_rate": 8.830361576118299e-07, + "loss": 0.0758, + "num_tokens": 56186222.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0592573881149292, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10262681537358839, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0397963478975493, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05426273532033236, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1246 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1325.0, + "completions/max_terminated_length": 1325.0, + "completions/mean_length": 1068.875, + "completions/mean_terminated_length": 1068.875, + "completions/min_length": 917.0, + "completions/min_terminated_length": 917.0, + "epoch": 0.3118279569892473, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3221854165973808, + "kl": 0.00533294677734375, + "learning_rate": 8.827718600028316e-07, + "loss": -0.0084, + "num_tokens": 56241284.0, + "reward": 0.0, + "reward_std": 0.7910306453704834, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04965239950502916, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13191640623793585, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.19167874358086956, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1247 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1429.0, + "completions/mean_length": 1174.9375, + "completions/mean_terminated_length": 1153.2667236328125, + "completions/min_length": 775.0, + "completions/min_terminated_length": 775.0, + "epoch": 0.31207801950487624, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.89681169338756, + "kl": 0.011962890625, + "learning_rate": 8.825073088327381e-07, + "loss": -0.0299, + "num_tokens": 56294387.0, + "reward": 0.0, + "reward_std": 0.9353560209274292, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10853887287107829, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05983775834161839, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408158, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1248 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1353.0, + "completions/mean_length": 1233.5, + "completions/mean_terminated_length": 1215.7333984375, + "completions/min_length": 1089.0, + "completions/min_terminated_length": 1089.0, + "epoch": 0.3123280820205051, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7175664994138726, + "kl": 0.00714111328125, + "learning_rate": 8.822425043031289e-07, + "loss": -0.0243, + "num_tokens": 56333731.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.996291995048523, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07847524622094541, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08970564438492995, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04216370213557838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1249 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1387.5, + "completions/mean_terminated_length": 1200.0, + "completions/min_length": 1007.0, + "completions/min_terminated_length": 1007.0, + "epoch": 0.31257814453613403, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2098839933446452, + "kl": 0.00756072998046875, + "learning_rate": 8.819774466157759e-07, + "loss": -0.0303, + "num_tokens": 56384451.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.4469760060310364, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.022445893832122, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16513509321708697, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13924399049470285, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1250 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 1239.5625, + "completions/mean_terminated_length": 979.125, + "completions/min_length": 802.0, + "completions/min_terminated_length": 802.0, + "epoch": 0.31282820705176295, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.027013036177877, + "kl": 0.004852294921875, + "learning_rate": 8.817121359726446e-07, + "loss": -0.0026, + "num_tokens": 56428676.0, + "reward": 0.0, + "reward_std": 0.8689641356468201, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12801869890037168, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06526712431015082, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869927, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1251 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1397.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 1197.6875, + "completions/mean_terminated_length": 1197.6875, + "completions/min_length": 908.0, + "completions/min_terminated_length": 908.0, + "epoch": 0.3130782695673918, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9070087754385407, + "kl": 0.00925445556640625, + "learning_rate": 8.814465725758926e-07, + "loss": -0.0222, + "num_tokens": 56472359.0, + "reward": 0.0, + "reward_std": 0.6609387397766113, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04936592898901547, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06796801519008522, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1252 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1115.0, + "completions/max_terminated_length": 1115.0, + "completions/mean_length": 892.1875, + "completions/mean_terminated_length": 892.1875, + "completions/min_length": 715.0, + "completions/min_terminated_length": 715.0, + "epoch": 0.31332833208302074, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7472531574253014, + "kl": 0.00726318359375, + "learning_rate": 8.811807566278706e-07, + "loss": -0.0007, + "num_tokens": 56512754.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.4298554062843323, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0026100778494819892, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.010440311397927957, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353545, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1253 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1219.5625, + "completions/mean_terminated_length": 1051.300048828125, + "completions/min_length": 661.0, + "completions/min_terminated_length": 661.0, + "epoch": 0.31357839459864967, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.159175507976226, + "kl": 0.0109405517578125, + "learning_rate": 8.809146883311214e-07, + "loss": -0.0209, + "num_tokens": 56569619.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.797643780708313, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05326030073182301, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06605919503658766, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194864, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1254 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1460.4375, + "completions/mean_terminated_length": 1373.4000244140625, + "completions/min_length": 1188.0, + "completions/min_terminated_length": 1188.0, + "epoch": 0.3138284571142786, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5428689251054135, + "kl": 0.0088653564453125, + "learning_rate": 8.806483678883801e-07, + "loss": 0.0093, + "num_tokens": 56638282.0, + "reward": 0.0, + "reward_std": 0.7582228183746338, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06602053474481864, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15039302924050416, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.061913918736689055, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1255 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1471.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 962.875, + "completions/mean_terminated_length": 962.875, + "completions/min_length": 681.0, + "completions/min_terminated_length": 681.0, + "epoch": 0.31407851962990746, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.12588378912093, + "kl": 0.009571075439453125, + "learning_rate": 8.803817955025745e-07, + "loss": -0.0146, + "num_tokens": 56676184.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.38857656717300415, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.012112143684433823, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12426922922370705, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08933913745655643, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1256 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1429.0, + "completions/mean_length": 1455.5, + "completions/mean_terminated_length": 1357.5999755859375, + "completions/min_length": 1265.0, + "completions/min_terminated_length": 1265.0, + "epoch": 0.3143285821455364, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6161046461019732, + "kl": 0.010528564453125, + "learning_rate": 8.801149713768233e-07, + "loss": -0.0006, + "num_tokens": 56739672.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.047508716583252, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.003657410331074391, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09724232448041892, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0909822937597079, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1257 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1153.0, + "completions/mean_length": 1178.25, + "completions/mean_terminated_length": 928.0, + "completions/min_length": 726.0, + "completions/min_terminated_length": 726.0, + "epoch": 0.3145786446611653, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1981102463829254, + "kl": 0.0115966796875, + "learning_rate": 8.798478957144379e-07, + "loss": -0.0375, + "num_tokens": 56786236.0, + "reward": 0.0, + "reward_std": 0.8982056379318237, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0005423209111228636, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.046623943992779256, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1258 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1144.0, + "completions/mean_length": 1256.5, + "completions/mean_terminated_length": 1013.0, + "completions/min_length": 931.0, + "completions/min_terminated_length": 931.0, + "epoch": 0.3148287071767942, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.6581807868246525, + "kl": 0.004940032958984375, + "learning_rate": 8.79580568718921e-07, + "loss": -0.0159, + "num_tokens": 56835980.0, + "reward": 0.0, + "reward_std": 0.8632340431213379, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01098221254189654, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04898144577774337, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09269623828717427, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1259 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1359.0, + "completions/max_terminated_length": 1359.0, + "completions/mean_length": 918.125, + "completions/mean_terminated_length": 918.125, + "completions/min_length": 581.0, + "completions/min_terminated_length": 581.0, + "epoch": 0.3150787696924231, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1012666233701007, + "kl": 0.00881195068359375, + "learning_rate": 8.793129905939669e-07, + "loss": -0.0307, + "num_tokens": 56873862.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9818915128707886, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01658123169064088, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05392301756254081, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921948, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1260 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1489.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1120.4375, + "completions/mean_terminated_length": 1120.4375, + "completions/min_length": 809.0, + "completions/min_terminated_length": 809.0, + "epoch": 0.315328832208052, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.891825043536752, + "kl": 0.0077056884765625, + "learning_rate": 8.790451615434613e-07, + "loss": 0.0159, + "num_tokens": 56907549.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.991524875164032, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02164391066093916, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08522912120239842, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901863, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1261 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1350.0, + "completions/max_terminated_length": 1350.0, + "completions/mean_length": 1067.6875, + "completions/mean_terminated_length": 1067.6875, + "completions/min_length": 777.0, + "completions/min_terminated_length": 777.0, + "epoch": 0.31557889472368095, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.122812557515336, + "kl": 0.010772705078125, + "learning_rate": 8.787770817714811e-07, + "loss": -0.0391, + "num_tokens": 56960728.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.844917893409729, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.020843703110103338, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08367613097389726, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0925962962222252, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1262 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1224.875, + "completions/mean_terminated_length": 1185.571533203125, + "completions/min_length": 926.0, + "completions/min_terminated_length": 926.0, + "epoch": 0.3158289572393098, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5831401199478474, + "kl": 0.00958251953125, + "learning_rate": 8.78508751482294e-07, + "loss": -0.042, + "num_tokens": 57006870.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6283839344978333, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03521893504008922, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06176924149742315, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0969917904124231, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1263 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1278.625, + "completions/mean_terminated_length": 1247.0, + "completions/min_length": 991.0, + "completions/min_terminated_length": 991.0, + "epoch": 0.31607901975493874, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7041815698074907, + "kl": 0.0100555419921875, + "learning_rate": 8.782401708803589e-07, + "loss": -0.0446, + "num_tokens": 57050584.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.986139178276062, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02056396629827638, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03763103982445374, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10749676997731401, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1264 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1377.0, + "completions/max_terminated_length": 1377.0, + "completions/mean_length": 1073.0625, + "completions/mean_terminated_length": 1073.0625, + "completions/min_length": 818.0, + "completions/min_terminated_length": 818.0, + "epoch": 0.31632908227056766, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.196250800775642, + "kl": 0.0122222900390625, + "learning_rate": 8.77971340170325e-07, + "loss": -0.0593, + "num_tokens": 57084169.0, + "reward": 0.0, + "reward_std": 0.871902585029602, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.31951278614592066, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.32864482632353104, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1112221667221529, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1265 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1346.0, + "completions/mean_length": 1295.1875, + "completions/mean_terminated_length": 1090.375, + "completions/min_length": 766.0, + "completions/min_terminated_length": 766.0, + "epoch": 0.31657914478619653, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9431771204126065, + "kl": 0.00814056396484375, + "learning_rate": 8.777022595570328e-07, + "loss": -0.0278, + "num_tokens": 57135404.0, + "reward": 0.0, + "reward_std": 0.8450451493263245, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02775475871162922, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08853593011555322, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09108400680852978, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1266 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1373.0, + "completions/max_terminated_length": 1373.0, + "completions/mean_length": 1053.0625, + "completions/mean_terminated_length": 1053.0625, + "completions/min_length": 792.0, + "completions/min_terminated_length": 792.0, + "epoch": 0.31682920730182546, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9977729155086448, + "kl": 0.015167236328125, + "learning_rate": 8.774329292455125e-07, + "loss": 0.0111, + "num_tokens": 57176037.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7042138576507568, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.005999162779551228, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05770274385363145, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.166888740937943, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 1267 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1249.0, + "completions/mean_length": 1051.5, + "completions/mean_terminated_length": 1021.6000366210938, + "completions/min_length": 680.0, + "completions/min_terminated_length": 680.0, + "epoch": 0.3170792698174544, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4426696700195, + "kl": 0.0111236572265625, + "learning_rate": 8.771633494409848e-07, + "loss": -0.075, + "num_tokens": 57218541.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9556465148925781, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07798515410496941, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09275173999192877, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08073734277593314, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1268 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1246.5625, + "completions/mean_terminated_length": 1229.666748046875, + "completions/min_length": 1030.0, + "completions/min_terminated_length": 1030.0, + "epoch": 0.31732933233308325, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.171582561377082, + "kl": 0.009368896484375, + "learning_rate": 8.768935203488608e-07, + "loss": -0.0015, + "num_tokens": 57255230.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.6499660015106201, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.025587079895831887, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04035611205083822, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901158, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1269 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 1288.625, + "completions/mean_terminated_length": 1239.84619140625, + "completions/min_length": 1064.0, + "completions/min_terminated_length": 1064.0, + "epoch": 0.31757939484871217, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7453828854869706, + "kl": 0.0101470947265625, + "learning_rate": 8.76623442174741e-07, + "loss": 0.0063, + "num_tokens": 57303344.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.9638252258300781, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03274625795786397, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02819710892061995, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07698003589195013, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1270 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1120.0, + "completions/max_terminated_length": 1120.0, + "completions/mean_length": 816.5, + "completions/mean_terminated_length": 816.5, + "completions/min_length": 555.0, + "completions/min_terminated_length": 555.0, + "epoch": 0.3178294573643411, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3114336144301006, + "kl": 0.010162353515625, + "learning_rate": 8.763531151244163e-07, + "loss": -0.0188, + "num_tokens": 57339464.0, + "reward": 0.0, + "reward_std": 0.8391399383544922, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0015718177903394324, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.051635653270523794, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08255189164891874, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1271 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1310.75, + "completions/mean_terminated_length": 1163.5555419921875, + "completions/min_length": 945.0, + "completions/min_terminated_length": 945.0, + "epoch": 0.31807951987997, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.510449023233515, + "kl": 0.0091094970703125, + "learning_rate": 8.760825394038666e-07, + "loss": -0.0609, + "num_tokens": 57392876.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.6867996454238892, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.034995793980720606, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07628627523174443, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0787635937708768, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1272 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1290.0, + "completions/mean_length": 1145.0625, + "completions/mean_terminated_length": 1094.357177734375, + "completions/min_length": 297.0, + "completions/min_terminated_length": 297.0, + "epoch": 0.3183295823955989, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.170496587094906, + "kl": 0.01519775390625, + "learning_rate": 8.75811715219262e-07, + "loss": -0.0607, + "num_tokens": 57439965.0, + "reward": 2.2351741790771484e-08, + "reward_std": 0.9830193519592285, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.037439631249994144, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18916657051937347, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1273 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1355.1875, + "completions/mean_terminated_length": 1242.5555419921875, + "completions/min_length": 975.0, + "completions/min_terminated_length": 975.0, + "epoch": 0.3185796449112278, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.689143389664131, + "kl": 0.0099945068359375, + "learning_rate": 8.755406427769612e-07, + "loss": 0.0255, + "num_tokens": 57488192.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.47554847598075867, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03465430491680672, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1860026831635244, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17888543819998318, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1274 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1285.9375, + "completions/mean_terminated_length": 1214.5833740234375, + "completions/min_length": 751.0, + "completions/min_terminated_length": 751.0, + "epoch": 0.31882970742685673, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.470082223679357, + "kl": 0.0135498046875, + "learning_rate": 8.752693222835125e-07, + "loss": -0.0075, + "num_tokens": 57537527.0, + "reward": 0.0, + "reward_std": 0.6157888770103455, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08242373409593384, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16004420014947696, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316813, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1275 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.75, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1467.125, + "completions/mean_terminated_length": 1368.5, + "completions/min_length": 1230.0, + "completions/min_terminated_length": 1230.0, + "epoch": 0.3190797699424856, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.323794321321336, + "kl": 0.0087432861328125, + "learning_rate": 8.749977539456531e-07, + "loss": -0.0026, + "num_tokens": 57603521.0, + "reward": 0.0, + "reward_std": 0.8554513454437256, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06488804904516149, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09964475453635982, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042252, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1276 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1376.0, + "completions/mean_length": 1241.75, + "completions/mean_terminated_length": 1204.857177734375, + "completions/min_length": 1029.0, + "completions/min_terminated_length": 1029.0, + "epoch": 0.3193298324581145, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.374271171788913, + "kl": 0.0077667236328125, + "learning_rate": 8.74725937970309e-07, + "loss": -0.0212, + "num_tokens": 57645189.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.042226791381836, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.005831554300973669, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05089044521871031, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0644061188719531, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1277 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1367.0, + "completions/mean_length": 1194.4375, + "completions/mean_terminated_length": 1123.923095703125, + "completions/min_length": 808.0, + "completions/min_terminated_length": 808.0, + "epoch": 0.31957989497374345, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8270831489959622, + "kl": 0.00826263427734375, + "learning_rate": 8.744538745645949e-07, + "loss": -0.0046, + "num_tokens": 57696180.0, + "reward": 0.0, + "reward_std": 0.47442808747291565, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02960400384506991, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04647808055174536, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.675, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0969917904124231, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1278 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1407.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1005.9375, + "completions/mean_terminated_length": 1005.9375, + "completions/min_length": 704.0, + "completions/min_terminated_length": 704.0, + "epoch": 0.3198299574893723, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.832544351114772, + "kl": 0.014434814453125, + "learning_rate": 8.741815639358144e-07, + "loss": -0.043, + "num_tokens": 57728587.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0624346733093262, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09573953544335784, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11329788940630521, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1279 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1179.0, + "completions/mean_length": 1242.6875, + "completions/mean_terminated_length": 985.375, + "completions/min_length": 855.0, + "completions/min_terminated_length": 855.0, + "epoch": 0.32008002000500124, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4104590563641453, + "kl": 0.00717926025390625, + "learning_rate": 8.739090062914586e-07, + "loss": -0.0242, + "num_tokens": 57772374.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8154550790786743, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1060110968909792, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.034094283595036086, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.102469507659596, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1280 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 1316.6875, + "completions/mean_terminated_length": 1255.5833740234375, + "completions/min_length": 859.0, + "completions/min_terminated_length": 859.0, + "epoch": 0.32033008252063017, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.05802467980822, + "kl": 0.012115478515625, + "learning_rate": 8.736362018392079e-07, + "loss": -0.0109, + "num_tokens": 57828225.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9612133502960205, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.021291353866776108, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1030604863863321, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715322, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1281 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1497.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1165.9375, + "completions/mean_terminated_length": 1165.9375, + "completions/min_length": 778.0, + "completions/min_terminated_length": 778.0, + "epoch": 0.3205801450362591, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0274695161255107, + "kl": 0.01068115234375, + "learning_rate": 8.733631507869296e-07, + "loss": -0.0201, + "num_tokens": 57886016.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9510307312011719, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.027036114210966786, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.041565726327144496, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0666666666666667, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1282 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1236.875, + "completions/mean_terminated_length": 1199.2857666015625, + "completions/min_length": 980.0, + "completions/min_terminated_length": 980.0, + "epoch": 0.32083020755188796, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8126942726992645, + "kl": 0.0145111083984375, + "learning_rate": 8.730898533426802e-07, + "loss": 0.0032, + "num_tokens": 57936958.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9970427751541138, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.021822032197048234, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05053696299838909, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11792967144619462, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1283 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1105.125, + "completions/mean_terminated_length": 868.2000122070312, + "completions/min_length": 520.0, + "completions/min_terminated_length": 520.0, + "epoch": 0.3210802700675169, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1829413322825357, + "kl": 0.0124359130859375, + "learning_rate": 8.72816309714703e-07, + "loss": -0.0402, + "num_tokens": 57983848.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.736251175403595, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03672304084829829, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08296308864277566, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05146016078626404, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1284 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1451.0, + "completions/max_terminated_length": 1451.0, + "completions/mean_length": 1063.6875, + "completions/mean_terminated_length": 1063.6875, + "completions/min_length": 769.0, + "completions/min_terminated_length": 769.0, + "epoch": 0.3213303325831458, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3237955871784437, + "kl": 0.01300048828125, + "learning_rate": 8.725425201114291e-07, + "loss": -0.0002, + "num_tokens": 58029275.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7892739772796631, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06421789805729246, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.4613335692754815, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0925962962222252, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1285 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1251.125, + "completions/mean_terminated_length": 1215.571533203125, + "completions/min_length": 904.0, + "completions/min_terminated_length": 904.0, + "epoch": 0.3215803950987747, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.829695063670309, + "kl": 0.01081085205078125, + "learning_rate": 8.72268484741477e-07, + "loss": -0.0315, + "num_tokens": 58072957.0, + "reward": 1.862645149230957e-08, + "reward_std": 1.027342677116394, + "rewards/wordcountpos_reward_GEOBench/mean": 1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.2863012102353866, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.27539601195998964, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0739118594202782, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1286 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1388.0, + "completions/mean_length": 1131.375, + "completions/mean_terminated_length": 1106.800048828125, + "completions/min_length": 1006.0, + "completions/min_terminated_length": 1006.0, + "epoch": 0.3218304576144036, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.963179312019697, + "kl": 0.0105133056640625, + "learning_rate": 8.719942038136527e-07, + "loss": -0.0208, + "num_tokens": 58121779.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9296264052391052, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10988579375495197, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06412072545415219, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12285191326386657, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1287 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1355.625, + "completions/mean_terminated_length": 1290.0, + "completions/min_length": 1126.0, + "completions/min_terminated_length": 1126.0, + "epoch": 0.3220805201300325, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.225335755984575, + "kl": 0.00720977783203125, + "learning_rate": 8.717196775369491e-07, + "loss": 0.0039, + "num_tokens": 58172341.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5091097950935364, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03985906498362934, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04250443408028256, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237135, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1288 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1233.5, + "completions/mean_terminated_length": 1215.7333984375, + "completions/min_length": 870.0, + "completions/min_terminated_length": 870.0, + "epoch": 0.3223305826456614, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9767946725012053, + "kl": 0.0113067626953125, + "learning_rate": 8.714449061205459e-07, + "loss": -0.0451, + "num_tokens": 58217741.0, + "reward": 0.0, + "reward_std": 0.8488910794258118, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1691116752495594, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1338340659001142, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1289 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1307.0, + "completions/max_terminated_length": 1307.0, + "completions/mean_length": 1025.5625, + "completions/mean_terminated_length": 1025.5625, + "completions/min_length": 803.0, + "completions/min_terminated_length": 803.0, + "epoch": 0.3225806451612903, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.716910906922037, + "kl": 0.0131072998046875, + "learning_rate": 8.711698897738101e-07, + "loss": -0.0234, + "num_tokens": 58255198.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8864603638648987, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.002433242195444104, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05772833807359084, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11417984514369006, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1290 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1456.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1234.375, + "completions/mean_terminated_length": 1234.375, + "completions/min_length": 1001.0, + "completions/min_terminated_length": 1001.0, + "epoch": 0.32283070767691924, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.21900758642703, + "kl": 0.0119476318359375, + "learning_rate": 8.708946287062949e-07, + "loss": 0.0102, + "num_tokens": 58306748.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9297760725021362, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.029641618534406645, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.052899157254013465, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1338185615204685, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1291 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1388.0, + "completions/max_terminated_length": 1388.0, + "completions/mean_length": 1094.1875, + "completions/mean_terminated_length": 1094.1875, + "completions/min_length": 784.0, + "completions/min_terminated_length": 784.0, + "epoch": 0.32308077019254816, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0501004885873697, + "kl": 0.0054168701171875, + "learning_rate": 8.706191231277401e-07, + "loss": 0.0, + "num_tokens": 58355215.0, + "reward": 0.0, + "reward_std": 0.7177466154098511, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07456302944756853, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03780616243484435, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.141878925953186, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1292 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1425.0, + "completions/mean_length": 1239.25, + "completions/mean_terminated_length": 1120.727294921875, + "completions/min_length": 956.0, + "completions/min_terminated_length": 956.0, + "epoch": 0.32333083270817703, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2028409221984657, + "kl": 0.0106201171875, + "learning_rate": 8.703433732480719e-07, + "loss": 0.001, + "num_tokens": 58413187.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.664710283279419, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08001468205933077, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08039100043674105, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06540472290116198, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1293 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 1185.4375, + "completions/mean_terminated_length": 1164.4666748046875, + "completions/min_length": 947.0, + "completions/min_terminated_length": 947.0, + "epoch": 0.32358089522380595, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3312254409537445, + "kl": 0.0141754150390625, + "learning_rate": 8.700673792774021e-07, + "loss": -0.0151, + "num_tokens": 58456658.0, + "reward": 0.0, + "reward_std": 0.6952779293060303, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06099956182401399, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.043421071904686984, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620105, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1294 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1402.0, + "completions/max_terminated_length": 1402.0, + "completions/mean_length": 1123.8125, + "completions/mean_terminated_length": 1123.8125, + "completions/min_length": 710.0, + "completions/min_terminated_length": 710.0, + "epoch": 0.3238309577394349, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.454631229779196, + "kl": 0.0082855224609375, + "learning_rate": 8.697911414260294e-07, + "loss": 0.0167, + "num_tokens": 58489991.0, + "reward": 0.0, + "reward_std": 0.9183434844017029, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0377825618631244, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06684886744788089, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941139, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1295 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1380.0, + "completions/mean_length": 1036.875, + "completions/mean_terminated_length": 1006.0000610351562, + "completions/min_length": 674.0, + "completions/min_terminated_length": 674.0, + "epoch": 0.32408102025506375, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7695955425454293, + "kl": 0.0126495361328125, + "learning_rate": 8.695146599044377e-07, + "loss": 0.0222, + "num_tokens": 58518789.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0362094640731812, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005835802696164611, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.047194019604548186, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042258, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1296 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1419.25, + "completions/mean_terminated_length": 1356.4444580078125, + "completions/min_length": 1267.0, + "completions/min_terminated_length": 1267.0, + "epoch": 0.32433108277069267, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0304530839946433, + "kl": 0.00732421875, + "learning_rate": 8.692379349232966e-07, + "loss": -0.0008, + "num_tokens": 58579409.0, + "reward": 0.0, + "reward_std": 0.8587586879730225, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12378269952667548, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10473620864677895, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7124999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09339283817414601, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1297 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1496.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1215.75, + "completions/mean_terminated_length": 1215.75, + "completions/min_length": 814.0, + "completions/min_terminated_length": 814.0, + "epoch": 0.3245811452863216, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.17969707686928, + "kl": 0.011962890625, + "learning_rate": 8.689609666934616e-07, + "loss": 0.0134, + "num_tokens": 58623589.0, + "reward": 0.0, + "reward_std": 0.827307939529419, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12487854879249675, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11898763697282329, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12816366850994057, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1298 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1383.3125, + "completions/mean_terminated_length": 1292.5555419921875, + "completions/min_length": 1022.0, + "completions/min_terminated_length": 1022.0, + "epoch": 0.32483120780195046, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.827357010276229, + "kl": 0.01019287109375, + "learning_rate": 8.686837554259731e-07, + "loss": 0.0427, + "num_tokens": 58676010.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.068483591079712, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06305496963998175, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1294393750447533, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09651328828101766, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1299 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1304.1875, + "completions/mean_terminated_length": 1215.181884765625, + "completions/min_length": 873.0, + "completions/min_terminated_length": 873.0, + "epoch": 0.3250812703175794, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.751131794108329, + "kl": 0.0091552734375, + "learning_rate": 8.684063013320568e-07, + "loss": -0.0222, + "num_tokens": 58728813.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7440013289451599, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05189579913857935, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16502298952805283, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09269623828717427, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1300 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1411.9375, + "completions/mean_terminated_length": 1298.71435546875, + "completions/min_length": 976.0, + "completions/min_terminated_length": 976.0, + "epoch": 0.3253313328332083, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0178117399712554, + "kl": 0.0103912353515625, + "learning_rate": 8.681286046231238e-07, + "loss": 0.0095, + "num_tokens": 58779884.0, + "reward": -1.862645149230957e-08, + "reward_std": 1.0500943660736084, + "rewards/wordcountpos_reward_GEOBench/mean": -1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.038727121721708446, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11040532991654721, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0909822937597079, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1301 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1464.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1052.4375, + "completions/mean_terminated_length": 1052.4375, + "completions/min_length": 584.0, + "completions/min_terminated_length": 584.0, + "epoch": 0.32558139534883723, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8697111932442474, + "kl": 0.009002685546875, + "learning_rate": 8.678506655107695e-07, + "loss": 0.0026, + "num_tokens": 58831395.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0348291397094727, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.031794231751404896, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09934228237165504, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194862, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1302 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1294.25, + "completions/mean_terminated_length": 1134.2222900390625, + "completions/min_length": 919.0, + "completions/min_terminated_length": 919.0, + "epoch": 0.3258314578644661, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0497839669778277, + "kl": 0.0101318359375, + "learning_rate": 8.675724842067742e-07, + "loss": -0.0107, + "num_tokens": 58890551.0, + "reward": 0.0, + "reward_std": 1.0540978908538818, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04205257474197182, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05940493619184038, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1303 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1195.0, + "completions/max_terminated_length": 1195.0, + "completions/mean_length": 991.875, + "completions/mean_terminated_length": 991.875, + "completions/min_length": 859.0, + "completions/min_terminated_length": 859.0, + "epoch": 0.326081520380095, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3656629149956254, + "kl": 0.010406494140625, + "learning_rate": 8.672940609231031e-07, + "loss": 0.0072, + "num_tokens": 58918909.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.050639271736145, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567835, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1304 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1245.0, + "completions/mean_length": 1264.0, + "completions/mean_terminated_length": 1080.4444580078125, + "completions/min_length": 966.0, + "completions/min_terminated_length": 966.0, + "epoch": 0.32633158289572395, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2153237699637693, + "kl": 0.00638580322265625, + "learning_rate": 8.670153958719051e-07, + "loss": -0.0166, + "num_tokens": 58981685.0, + "reward": 0.0, + "reward_std": 0.6412097215652466, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.003978189760398926, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09425885107036286, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746354, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1305 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1193.0, + "completions/max_terminated_length": 1193.0, + "completions/mean_length": 976.1875, + "completions/mean_terminated_length": 976.1875, + "completions/min_length": 713.0, + "completions/min_terminated_length": 713.0, + "epoch": 0.3265816454113528, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8995587310414925, + "kl": 0.007790565490722656, + "learning_rate": 8.667364892655141e-07, + "loss": 0.0131, + "num_tokens": 59014520.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8633036613464355, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08686991884596398, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09164814444064986, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869927, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1306 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1391.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 971.125, + "completions/mean_terminated_length": 971.125, + "completions/min_length": 744.0, + "completions/min_terminated_length": 744.0, + "epoch": 0.32683170792698174, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.778214651393554, + "kl": 0.0143585205078125, + "learning_rate": 8.664573413164475e-07, + "loss": 0.0079, + "num_tokens": 59046098.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.9731907248497009, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -7.398218802246587e-05, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.039405688026983895, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13270686158262923, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1307 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1326.0, + "completions/mean_length": 1144.375, + "completions/mean_terminated_length": 1025.8333740234375, + "completions/min_length": 723.0, + "completions/min_terminated_length": 723.0, + "epoch": 0.32708177044261066, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2812436771381273, + "kl": 0.012847900390625, + "learning_rate": 8.661779522374067e-07, + "loss": 0.0149, + "num_tokens": 59085216.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0200004577636719, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02577242171220634, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04167180640829693, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.048495895206211566, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1308 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1291.3125, + "completions/mean_terminated_length": 1243.1539306640625, + "completions/min_length": 379.0, + "completions/min_terminated_length": 379.0, + "epoch": 0.32733183295823953, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.896192975480248, + "kl": 0.011993408203125, + "learning_rate": 8.658983222412771e-07, + "loss": -0.0877, + "num_tokens": 59145773.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6140938401222229, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06452781641873116, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07931563617500584, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05692750425533113, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1309 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1297.0, + "completions/mean_length": 903.0, + "completions/mean_terminated_length": 863.2000732421875, + "completions/min_length": 661.0, + "completions/min_terminated_length": 661.0, + "epoch": 0.32758189547386846, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4596768020992164, + "kl": 0.00830841064453125, + "learning_rate": 8.656184515411272e-07, + "loss": 0.0097, + "num_tokens": 59171733.0, + "reward": 0.0, + "reward_std": 0.6554725170135498, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07340107145017077, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08732069403072977, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059004080210452246, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1310 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1385.125, + "completions/mean_terminated_length": 1346.8333740234375, + "completions/min_length": 1231.0, + "completions/min_terminated_length": 1231.0, + "epoch": 0.3278319579894974, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.946365878697217, + "kl": 0.010406494140625, + "learning_rate": 8.653383403502092e-07, + "loss": 0.0013, + "num_tokens": 59214735.0, + "reward": 0.0, + "reward_std": 0.9996354579925537, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0001278220005242912, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14161923391501796, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1167460047694551, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1311 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1241.0, + "completions/mean_length": 1147.625, + "completions/mean_terminated_length": 873.5555419921875, + "completions/min_length": 594.0, + "completions/min_terminated_length": 594.0, + "epoch": 0.3280820205051263, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2444659116675143, + "kl": 0.0088348388671875, + "learning_rate": 8.650579888819586e-07, + "loss": -0.0303, + "num_tokens": 59257865.0, + "reward": -1.30385160446167e-08, + "reward_std": 1.0559349060058594, + "rewards/wordcountpos_reward_GEOBench/mean": -1.30385160446167e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07994871308895182, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.182479301576449, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1312 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1309.0, + "completions/max_terminated_length": 1309.0, + "completions/mean_length": 1032.9375, + "completions/mean_terminated_length": 1032.9375, + "completions/min_length": 787.0, + "completions/min_terminated_length": 787.0, + "epoch": 0.32833208302075517, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4235774514114814, + "kl": 0.012359619140625, + "learning_rate": 8.64777397349994e-07, + "loss": -0.0174, + "num_tokens": 59289184.0, + "reward": 0.0, + "reward_std": 0.3753572106361389, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.024560296937456576, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07400211703391478, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13333333333333333, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1313 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1201.0, + "completions/max_terminated_length": 1201.0, + "completions/mean_length": 1003.3125, + "completions/mean_terminated_length": 1003.3125, + "completions/min_length": 761.0, + "completions/min_terminated_length": 761.0, + "epoch": 0.3285821455363841, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2152086348242555, + "kl": 0.00897979736328125, + "learning_rate": 8.644965659681166e-07, + "loss": -0.0193, + "num_tokens": 59324853.0, + "reward": 0.0, + "reward_std": 0.582179069519043, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09180732397097667, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14775044393434983, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10036968702787749, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1314 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1295.125, + "completions/mean_terminated_length": 1281.4666748046875, + "completions/min_length": 1152.0, + "completions/min_terminated_length": 1152.0, + "epoch": 0.328832208052013, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2839665251502947, + "kl": 0.01012420654296875, + "learning_rate": 8.642154949503106e-07, + "loss": 0.007, + "num_tokens": 59385735.0, + "reward": 0.0, + "reward_std": 0.55080646276474, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10006921009672333, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20853819948745211, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09888264649460886, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1315 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1203.125, + "completions/mean_terminated_length": 1160.71435546875, + "completions/min_length": 907.0, + "completions/min_terminated_length": 907.0, + "epoch": 0.3290822705676419, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3641581472382702, + "kl": 0.013641357421875, + "learning_rate": 8.639341845107432e-07, + "loss": -0.0458, + "num_tokens": 59428505.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0195761919021606, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.029111847315880696, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07889528290109964, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1128748897706693, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1316 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1229.0, + "completions/mean_length": 950.75, + "completions/mean_terminated_length": 914.1333618164062, + "completions/min_length": 629.0, + "completions/min_terminated_length": 629.0, + "epoch": 0.3293323330832708, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.127717975112397, + "kl": 0.011322021484375, + "learning_rate": 8.636526348637629e-07, + "loss": -0.0363, + "num_tokens": 59459117.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9266012907028198, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10013744890024569, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09338993751659994, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07503085784948506, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1317 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1466.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1127.4375, + "completions/mean_terminated_length": 1127.4375, + "completions/min_length": 757.0, + "completions/min_terminated_length": 757.0, + "epoch": 0.32958239559889974, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2905556295072635, + "kl": 0.01019287109375, + "learning_rate": 8.633708462239017e-07, + "loss": -0.0543, + "num_tokens": 59504548.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8912458419799805, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.15526317669816847, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1501501555044339, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1318 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1112.5, + "completions/mean_terminated_length": 1057.1429443359375, + "completions/min_length": 794.0, + "completions/min_terminated_length": 794.0, + "epoch": 0.32983245811452866, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8873532730001545, + "kl": 0.00714874267578125, + "learning_rate": 8.63088818805873e-07, + "loss": 0.0283, + "num_tokens": 59536972.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0355312824249268, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07095355979903735, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.29673283034439457, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06191391873668906, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1319 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1289.0, + "completions/max_terminated_length": 1289.0, + "completions/mean_length": 1021.5625, + "completions/mean_terminated_length": 1021.5625, + "completions/min_length": 747.0, + "completions/min_terminated_length": 747.0, + "epoch": 0.3300825206301575, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1241659239781163, + "kl": 0.0134429931640625, + "learning_rate": 8.628065528245722e-07, + "loss": 0.01, + "num_tokens": 59568277.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9228854179382324, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.023986186797536113, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09161811165214356, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08062257748298547, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1320 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1376.0, + "completions/mean_length": 1324.375, + "completions/mean_terminated_length": 1148.75, + "completions/min_length": 514.0, + "completions/min_terminated_length": 514.0, + "epoch": 0.33033258314578645, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.098233183725489, + "kl": 0.00891876220703125, + "learning_rate": 8.625240484950769e-07, + "loss": -0.0693, + "num_tokens": 59626179.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6578680276870728, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.23389351860519486, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21781160524670734, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1321 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1291.0, + "completions/max_terminated_length": 1291.0, + "completions/mean_length": 969.5, + "completions/mean_terminated_length": 969.5, + "completions/min_length": 752.0, + "completions/min_terminated_length": 752.0, + "epoch": 0.3305826456614154, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.212966978004393, + "kl": 0.011474609375, + "learning_rate": 8.622413060326458e-07, + "loss": -0.0139, + "num_tokens": 59660355.0, + "reward": 0.0, + "reward_std": 0.7967063188552856, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06622852403965855, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11095527089886047, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04554200340426488, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1322 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1394.0, + "completions/mean_length": 1221.8125, + "completions/mean_terminated_length": 1129.0833740234375, + "completions/min_length": 884.0, + "completions/min_terminated_length": 884.0, + "epoch": 0.33083270817704424, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.061693476864947, + "kl": 0.01190185546875, + "learning_rate": 8.619583256527191e-07, + "loss": -0.0299, + "num_tokens": 59715448.0, + "reward": 0.0, + "reward_std": 0.7548435926437378, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05146548749911151, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.203573472696939, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09269623828717428, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1323 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1328.0, + "completions/max_terminated_length": 1328.0, + "completions/mean_length": 874.125, + "completions/mean_terminated_length": 874.125, + "completions/min_length": 618.0, + "completions/min_terminated_length": 618.0, + "epoch": 0.33108277069267317, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.668450016780099, + "kl": 0.0106048583984375, + "learning_rate": 8.616751075709188e-07, + "loss": 0.0114, + "num_tokens": 59755666.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.962219774723053, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04588550329111716, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08837001915330017, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059628479399994404, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1324 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1382.0, + "completions/max_terminated_length": 1382.0, + "completions/mean_length": 1016.125, + "completions/mean_terminated_length": 1016.125, + "completions/min_length": 846.0, + "completions/min_terminated_length": 846.0, + "epoch": 0.3313328332083021, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.054203683276626, + "kl": 0.01004791259765625, + "learning_rate": 8.613916520030474e-07, + "loss": -0.0322, + "num_tokens": 59790868.0, + "reward": 0.0, + "reward_std": 0.647649884223938, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.039202608472940007, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08536247009323587, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1325 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1266.0, + "completions/max_terminated_length": 1266.0, + "completions/mean_length": 1015.5625, + "completions/mean_terminated_length": 1015.5625, + "completions/min_length": 727.0, + "completions/min_terminated_length": 727.0, + "epoch": 0.33158289572393096, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.961650935962609, + "kl": 0.011474609375, + "learning_rate": 8.611079591650887e-07, + "loss": -0.0274, + "num_tokens": 59836229.0, + "reward": 0.0, + "reward_std": 0.6741411685943604, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10881780066880664, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1291325000963345, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12641788434189796, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1326 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1150.9375, + "completions/mean_terminated_length": 1101.071533203125, + "completions/min_length": 471.0, + "completions/min_terminated_length": 471.0, + "epoch": 0.3318329582395599, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.342512371627052, + "kl": 0.015106201171875, + "learning_rate": 8.608240292732074e-07, + "loss": -0.0492, + "num_tokens": 59886500.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9688470363616943, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1958619408986246, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07304958608033985, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12345339501504503, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1327 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1360.0, + "completions/mean_length": 1244.375, + "completions/mean_terminated_length": 1185.3846435546875, + "completions/min_length": 932.0, + "completions/min_terminated_length": 932.0, + "epoch": 0.3320830207551888, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.380613141044302, + "kl": 0.009296417236328125, + "learning_rate": 8.605398625437482e-07, + "loss": -0.0033, + "num_tokens": 59928546.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7166042327880859, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12566807018921752, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16519714928701518, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1167460047694551, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1328 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1196.9375, + "completions/mean_terminated_length": 1153.6429443359375, + "completions/min_length": 696.0, + "completions/min_terminated_length": 696.0, + "epoch": 0.33233308327081773, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.249557348144354, + "kl": 0.0103912353515625, + "learning_rate": 8.602554591932372e-07, + "loss": -0.0296, + "num_tokens": 59971721.0, + "reward": 0.0, + "reward_std": 0.9756282567977905, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06067435692207683, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08740896934351575, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09583937179043478, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1329 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1232.0, + "completions/mean_terminated_length": 1110.181884765625, + "completions/min_length": 839.0, + "completions/min_terminated_length": 839.0, + "epoch": 0.3325831457864466, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.743717414822609, + "kl": 0.0103759765625, + "learning_rate": 8.5997081943838e-07, + "loss": 0.0146, + "num_tokens": 60015297.0, + "reward": 9.313225746154785e-09, + "reward_std": 1.0512570142745972, + "rewards/wordcountpos_reward_GEOBench/mean": 9.313225746154785e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.100833340155947, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08360695292769169, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0807373427759331, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1330 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1200.6875, + "completions/mean_terminated_length": 1064.6363525390625, + "completions/min_length": 831.0, + "completions/min_terminated_length": 831.0, + "epoch": 0.3328332083020755, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6212775334799683, + "kl": 0.0107574462890625, + "learning_rate": 8.596859434960626e-07, + "loss": 0.0263, + "num_tokens": 60061012.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7464958429336548, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.004406856314907374, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10275029730595325, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563384, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1331 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1027.375, + "completions/mean_terminated_length": 995.86669921875, + "completions/min_length": 496.0, + "completions/min_terminated_length": 496.0, + "epoch": 0.33308327081770445, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2721322665114396, + "kl": 0.0089111328125, + "learning_rate": 8.594008315833511e-07, + "loss": 0.0109, + "num_tokens": 60092226.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8554216027259827, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.020089139203857555, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09493041734230062, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1332 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1284.75, + "completions/mean_terminated_length": 1155.5999755859375, + "completions/min_length": 999.0, + "completions/min_terminated_length": 999.0, + "epoch": 0.3333333333333333, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.56994655551278, + "kl": 0.00732421875, + "learning_rate": 8.591154839174914e-07, + "loss": 0.0124, + "num_tokens": 60147702.0, + "reward": 0.0, + "reward_std": 0.8782030344009399, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0309375, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06796998927567932, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1333 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1081.125, + "completions/mean_terminated_length": 1021.2857666015625, + "completions/min_length": 705.0, + "completions/min_terminated_length": 705.0, + "epoch": 0.33358339584896224, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0890118613597157, + "kl": 0.0116729736328125, + "learning_rate": 8.588299007159087e-07, + "loss": 0.0042, + "num_tokens": 60200120.0, + "reward": 0.0, + "reward_std": 0.9033869504928589, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04871082150568705, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09305958977057244, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1334 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1383.0, + "completions/mean_length": 1346.25, + "completions/mean_terminated_length": 1254.0, + "completions/min_length": 1018.0, + "completions/min_terminated_length": 1018.0, + "epoch": 0.33383345836459116, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.459381190526839, + "kl": 0.007114410400390625, + "learning_rate": 8.585440821962081e-07, + "loss": -0.0058, + "num_tokens": 60246860.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.966549277305603, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.040324763028903365, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05135349833406559, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1335 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1235.0625, + "completions/mean_terminated_length": 1173.923095703125, + "completions/min_length": 844.0, + "completions/min_terminated_length": 844.0, + "epoch": 0.33408352088022003, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0438667115293514, + "kl": 0.012298583984375, + "learning_rate": 8.582580285761737e-07, + "loss": -0.0024, + "num_tokens": 60300181.0, + "reward": 0.0, + "reward_std": 0.37647104263305664, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12783475396111021, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15956175962517408, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.161245154965971, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1336 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1276.25, + "completions/mean_terminated_length": 1244.2857666015625, + "completions/min_length": 929.0, + "completions/min_terminated_length": 929.0, + "epoch": 0.33433358339584895, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.188599484590302, + "kl": 0.0147857666015625, + "learning_rate": 8.57971740073769e-07, + "loss": 0.0503, + "num_tokens": 60340577.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.8062348365783691, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0209604042965036, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13882046057127728, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10174405069512348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1337 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1221.0, + "completions/max_terminated_length": 1221.0, + "completions/mean_length": 1055.3125, + "completions/mean_terminated_length": 1055.3125, + "completions/min_length": 925.0, + "completions/min_terminated_length": 925.0, + "epoch": 0.3345836459114779, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.82174588326954, + "kl": 0.0142974853515625, + "learning_rate": 8.57685216907136e-07, + "loss": -0.0004, + "num_tokens": 60386582.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8394474983215332, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0037125843352705263, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1378058497244492, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1088662107903635, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1338 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1294.75, + "completions/mean_terminated_length": 1265.4285888671875, + "completions/min_length": 852.0, + "completions/min_terminated_length": 852.0, + "epoch": 0.3348337084271068, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6453841521044152, + "kl": 0.009063720703125, + "learning_rate": 8.573984592945962e-07, + "loss": -0.0265, + "num_tokens": 60444018.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9839876294136047, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04789276812688702, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07623780390128346, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05426273532033237, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1339 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1375.0, + "completions/max_terminated_length": 1375.0, + "completions/mean_length": 1184.9375, + "completions/mean_terminated_length": 1184.9375, + "completions/min_length": 1005.0, + "completions/min_terminated_length": 1005.0, + "epoch": 0.33508377094273567, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4547439367125548, + "kl": 0.007080078125, + "learning_rate": 8.571114674546493e-07, + "loss": -0.0177, + "num_tokens": 60490889.0, + "reward": -1.1175870895385742e-08, + "reward_std": 0.9748765826225281, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.30576001444881884, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.35015824448800714, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09339283817414602, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1340 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1247.75, + "completions/mean_terminated_length": 1163.666748046875, + "completions/min_length": 942.0, + "completions/min_terminated_length": 942.0, + "epoch": 0.3353338334583646, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.646810737457037, + "kl": 0.0095672607421875, + "learning_rate": 8.568242416059732e-07, + "loss": 0.0131, + "num_tokens": 60542573.0, + "reward": 0.0, + "reward_std": 0.9878085851669312, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.021015609658423532, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.055016153035529, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09737289911202955, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1341 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1371.0, + "completions/mean_length": 1159.75, + "completions/mean_terminated_length": 1137.0667724609375, + "completions/min_length": 843.0, + "completions/min_terminated_length": 843.0, + "epoch": 0.3355838959739935, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.080648677907141, + "kl": 0.0117034912109375, + "learning_rate": 8.565367819674247e-07, + "loss": -0.0521, + "num_tokens": 60595057.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.0400519371032715, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011104837318268935, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1250985391341496, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0926962382871743, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1342 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1110.0, + "completions/mean_length": 923.3125, + "completions/mean_terminated_length": 661.1818237304688, + "completions/min_length": 458.0, + "completions/min_terminated_length": 458.0, + "epoch": 0.3358339584896224, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3676834787358025, + "kl": 0.011566162109375, + "learning_rate": 8.562490887580388e-07, + "loss": -0.0721, + "num_tokens": 60634094.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0431368350982666, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00928585558513977, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10263302364450425, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09339283817414601, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1343 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1325.0, + "completions/mean_length": 1469.625, + "completions/mean_terminated_length": 1257.0, + "completions/min_length": 1189.0, + "completions/min_terminated_length": 1189.0, + "epoch": 0.3360840210052513, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.397419988331558, + "kl": 0.0107421875, + "learning_rate": 8.559611621970277e-07, + "loss": 0.0099, + "num_tokens": 60698992.0, + "reward": 0.0, + "reward_std": 0.9898213148117065, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01338145674125339, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14065430098879497, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1344 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1308.875, + "completions/mean_terminated_length": 1222.0, + "completions/min_length": 1011.0, + "completions/min_terminated_length": 1011.0, + "epoch": 0.33633408352088023, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3546189927055967, + "kl": 0.00777435302734375, + "learning_rate": 8.556730025037819e-07, + "loss": -0.0137, + "num_tokens": 60743222.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0352001190185547, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04553132901341624, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.143474177130227, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1345 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1233.75, + "completions/mean_terminated_length": 1172.3077392578125, + "completions/min_length": 682.0, + "completions/min_terminated_length": 682.0, + "epoch": 0.3365841460365091, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4134914131828116, + "kl": 0.0128936767578125, + "learning_rate": 8.553846098978696e-07, + "loss": 0.0244, + "num_tokens": 60798578.0, + "reward": 0.0, + "reward_std": 0.7198019623756409, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09313651294656823, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17970313211527317, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238706, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1346 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1087.75, + "completions/mean_terminated_length": 992.6154174804688, + "completions/min_length": 640.0, + "completions/min_terminated_length": 640.0, + "epoch": 0.336834208552138, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.654813573898097, + "kl": 0.013671875, + "learning_rate": 8.550959845990366e-07, + "loss": -0.0007, + "num_tokens": 60844278.0, + "reward": 0.0, + "reward_std": 0.9253852963447571, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10886621079036346, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1347 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1433.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1166.1875, + "completions/mean_terminated_length": 1166.1875, + "completions/min_length": 912.0, + "completions/min_terminated_length": 912.0, + "epoch": 0.33708427106776695, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.380595556986614, + "kl": 0.018402099609375, + "learning_rate": 8.548071268272056e-07, + "loss": -0.021, + "num_tokens": 60899225.0, + "reward": 0.0, + "reward_std": 0.8896897435188293, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.012030905622866564, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07864757214268407, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12224747213928168, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1348 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1230.0, + "completions/max_terminated_length": 1230.0, + "completions/mean_length": 1048.5625, + "completions/mean_terminated_length": 1048.5625, + "completions/min_length": 763.0, + "completions/min_terminated_length": 763.0, + "epoch": 0.3373343335833959, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.962606021314234, + "kl": 0.014190673828125, + "learning_rate": 8.545180368024764e-07, + "loss": 0.0385, + "num_tokens": 60949786.0, + "reward": 0.0, + "reward_std": 0.37601929903030396, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02367476730801943, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09414883263226145, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.65, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14298407059684812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1349 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1061.0, + "completions/max_terminated_length": 1061.0, + "completions/mean_length": 957.4375, + "completions/mean_terminated_length": 957.4375, + "completions/min_length": 771.0, + "completions/min_terminated_length": 771.0, + "epoch": 0.33758439609902474, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.475694543468588, + "kl": 0.009246826171875, + "learning_rate": 8.542287147451263e-07, + "loss": -0.0255, + "num_tokens": 60993609.0, + "reward": 0.0, + "reward_std": 0.9992542266845703, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01661313421899376, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04382408713239269, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05708992257184504, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1350 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1141.0, + "completions/mean_length": 1183.0625, + "completions/mean_terminated_length": 992.9000244140625, + "completions/min_length": 731.0, + "completions/min_terminated_length": 731.0, + "epoch": 0.33783445861465367, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3383892463492084, + "kl": 0.010894775390625, + "learning_rate": 8.539391608756089e-07, + "loss": -0.0459, + "num_tokens": 61043946.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9762459397315979, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09938818660795495, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11483454474211907, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1351 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1069.0, + "completions/max_terminated_length": 1069.0, + "completions/mean_length": 965.9375, + "completions/mean_terminated_length": 965.9375, + "completions/min_length": 835.0, + "completions/min_terminated_length": 835.0, + "epoch": 0.3380845211302826, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.521491577923054, + "kl": 0.007904052734375, + "learning_rate": 8.536493754145547e-07, + "loss": 0.0108, + "num_tokens": 61085001.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0652832984924316, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03611153840190022, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13625332133037657, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11894598836509011, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1352 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1289.0, + "completions/max_terminated_length": 1289.0, + "completions/mean_length": 1112.0625, + "completions/mean_terminated_length": 1112.0625, + "completions/min_length": 868.0, + "completions/min_terminated_length": 868.0, + "epoch": 0.33833458364591146, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5267956374900766, + "kl": 0.0074615478515625, + "learning_rate": 8.533593585827708e-07, + "loss": 0.0065, + "num_tokens": 61131666.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7456632852554321, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.025236811192446192, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14026545686991082, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16510378329783743, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1353 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1300.9375, + "completions/mean_terminated_length": 1272.5, + "completions/min_length": 1067.0, + "completions/min_terminated_length": 1067.0, + "epoch": 0.3385846461615404, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.293781568964852, + "kl": 0.0063018798828125, + "learning_rate": 8.5306911060124e-07, + "loss": 0.0114, + "num_tokens": 61178393.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0302597284317017, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04584104976452451, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09278000742218323, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466157, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1354 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1288.0, + "completions/mean_length": 1269.375, + "completions/mean_terminated_length": 1038.75, + "completions/min_length": 855.0, + "completions/min_terminated_length": 855.0, + "epoch": 0.3388347086771693, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6085682537997226, + "kl": 0.00971221923828125, + "learning_rate": 8.527786316911219e-07, + "loss": 0.0148, + "num_tokens": 61223327.0, + "reward": 0.0, + "reward_std": 0.6621127128601074, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0867217691683625, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14215076069890328, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.18678567634829202, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1355 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1172.375, + "completions/mean_terminated_length": 1150.533447265625, + "completions/min_length": 768.0, + "completions/min_terminated_length": 768.0, + "epoch": 0.3390847711927982, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9698487687386206, + "kl": 0.011077880859375, + "learning_rate": 8.524879220737515e-07, + "loss": -0.0327, + "num_tokens": 61252509.0, + "reward": 0.0, + "reward_std": 0.38859373331069946, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08591625533593984, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09335579320704329, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07290277645477446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1356 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 958.0, + "completions/max_terminated_length": 958.0, + "completions/mean_length": 758.25, + "completions/mean_terminated_length": 758.25, + "completions/min_length": 566.0, + "completions/min_terminated_length": 566.0, + "epoch": 0.3393348337084271, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6187311559101376, + "kl": 0.00547027587890625, + "learning_rate": 8.521969819706402e-07, + "loss": -0.016, + "num_tokens": 61278921.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0178874731063843, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.16087242385300485, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.27222199829605254, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1002773930432755, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1357 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1372.0, + "completions/max_terminated_length": 1372.0, + "completions/mean_length": 1106.0, + "completions/mean_terminated_length": 1106.0, + "completions/min_length": 662.0, + "completions/min_terminated_length": 662.0, + "epoch": 0.339584896224056, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1045046808146464, + "kl": 0.0117034912109375, + "learning_rate": 8.519058116034745e-07, + "loss": -0.0336, + "num_tokens": 61312129.0, + "reward": 0.0, + "reward_std": 0.7545187473297119, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.034919237930290045, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06580297094920479, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14395215254459456, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1358 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1318.0, + "completions/mean_length": 1050.375, + "completions/mean_terminated_length": 1020.4000244140625, + "completions/min_length": 536.0, + "completions/min_terminated_length": 536.0, + "epoch": 0.33983495873968494, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6011537872508814, + "kl": 0.012969970703125, + "learning_rate": 8.516144111941164e-07, + "loss": 0.0019, + "num_tokens": 61342807.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6558761596679688, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019952868977085824, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07664121383848792, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921948, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1359 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1179.8125, + "completions/mean_terminated_length": 1105.923095703125, + "completions/min_length": 905.0, + "completions/min_terminated_length": 905.0, + "epoch": 0.3400850212553138, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3011925333410304, + "kl": 0.011260986328125, + "learning_rate": 8.513227809646037e-07, + "loss": -0.0271, + "num_tokens": 61387276.0, + "reward": 0.0, + "reward_std": 0.6572839021682739, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.027664036334155373, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.038183601563440535, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1270024788326182, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1360 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1348.625, + "completions/mean_terminated_length": 1257.800048828125, + "completions/min_length": 1084.0, + "completions/min_terminated_length": 1084.0, + "epoch": 0.34033508377094274, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.58230483688851, + "kl": 0.00982666015625, + "learning_rate": 8.510309211371486e-07, + "loss": -0.0169, + "num_tokens": 61443310.0, + "reward": 1.1175870895385742e-08, + "reward_std": 0.9199034571647644, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.009284682993577221, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.026926268047451702, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1361 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1165.0, + "completions/mean_length": 989.5625, + "completions/mean_terminated_length": 819.4166870117188, + "completions/min_length": 580.0, + "completions/min_terminated_length": 580.0, + "epoch": 0.34058514628657166, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.200073558251365, + "kl": 0.0098419189453125, + "learning_rate": 8.507388319341387e-07, + "loss": -0.0524, + "num_tokens": 61494439.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6894327402114868, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.3579111437600314, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.34146601302447055, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.65, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07503085784948503, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1362 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1063.375, + "completions/mean_terminated_length": 1001.0000610351562, + "completions/min_length": 659.0, + "completions/min_terminated_length": 659.0, + "epoch": 0.34083520880220053, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7632110224386675, + "kl": 0.0150299072265625, + "learning_rate": 8.504465135781364e-07, + "loss": -0.0165, + "num_tokens": 61541565.0, + "reward": 0.0, + "reward_std": 0.8718996644020081, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.033268531275718806, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11382599723639869, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746357, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1363 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1306.0, + "completions/max_terminated_length": 1306.0, + "completions/mean_length": 942.25, + "completions/mean_terminated_length": 942.25, + "completions/min_length": 566.0, + "completions/min_terminated_length": 566.0, + "epoch": 0.34108527131782945, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9247202184382024, + "kl": 0.011932373046875, + "learning_rate": 8.501539662918782e-07, + "loss": -0.0597, + "num_tokens": 61585297.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.4607587456703186, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005266265612154793, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.31458021251874213, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12164002752505566, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1364 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1319.875, + "completions/mean_terminated_length": 1259.8333740234375, + "completions/min_length": 952.0, + "completions/min_terminated_length": 952.0, + "epoch": 0.3413353338334584, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0331561595623375, + "kl": 0.0121612548828125, + "learning_rate": 8.498611902982759e-07, + "loss": -0.0023, + "num_tokens": 61636719.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7729552984237671, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03150142836348327, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1434353131726578, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10318986456114841, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1365 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1254.0, + "completions/max_terminated_length": 1254.0, + "completions/mean_length": 995.875, + "completions/mean_terminated_length": 995.875, + "completions/min_length": 825.0, + "completions/min_terminated_length": 825.0, + "epoch": 0.34158539634908724, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5506032642139345, + "kl": 0.01116943359375, + "learning_rate": 8.495681858204147e-07, + "loss": -0.0397, + "num_tokens": 61664797.0, + "reward": 0.0, + "reward_std": 0.7529758214950562, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08738336421882112, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06822073594335479, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10470416879457554, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1366 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1358.0, + "completions/mean_length": 1332.875, + "completions/mean_terminated_length": 1232.5999755859375, + "completions/min_length": 1153.0, + "completions/min_terminated_length": 1153.0, + "epoch": 0.34183545886471617, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6194716282699018, + "kl": 0.0110931396484375, + "learning_rate": 8.492749530815544e-07, + "loss": -0.006, + "num_tokens": 61722723.0, + "reward": 0.0, + "reward_std": 0.8642160892486572, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0410719428532461, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05685522197740933, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10027739304327551, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1367 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1059.25, + "completions/mean_terminated_length": 996.2857666015625, + "completions/min_length": 651.0, + "completions/min_terminated_length": 651.0, + "epoch": 0.3420855213803451, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.187716499235375, + "kl": 0.01177978515625, + "learning_rate": 8.489814923051287e-07, + "loss": -0.0441, + "num_tokens": 61763295.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6600533723831177, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1369717772924408, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12999800004971174, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09651328828101766, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1368 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1334.0, + "completions/max_terminated_length": 1334.0, + "completions/mean_length": 1136.0, + "completions/mean_terminated_length": 1136.0, + "completions/min_length": 918.0, + "completions/min_terminated_length": 918.0, + "epoch": 0.342335583895974, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0594838041630963, + "kl": 0.0108642578125, + "learning_rate": 8.486878037147446e-07, + "loss": 0.0035, + "num_tokens": 61801663.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.0607013702392578, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.026343778461499608, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0785356792757792, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0859586463881842, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1369 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1348.0, + "completions/max_terminated_length": 1348.0, + "completions/mean_length": 1047.6875, + "completions/mean_terminated_length": 1047.6875, + "completions/min_length": 908.0, + "completions/min_terminated_length": 908.0, + "epoch": 0.3425856464116029, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8958394912234375, + "kl": 0.008453369140625, + "learning_rate": 8.483938875341836e-07, + "loss": -0.0101, + "num_tokens": 61838370.0, + "reward": 0.0, + "reward_std": 0.5845850706100464, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.006463684368173597, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11705732961140891, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1189459883650901, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1370 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1322.0, + "completions/mean_length": 1272.9375, + "completions/mean_terminated_length": 1096.3333740234375, + "completions/min_length": 885.0, + "completions/min_terminated_length": 885.0, + "epoch": 0.3428357089272318, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.280611427942962, + "kl": 0.0109710693359375, + "learning_rate": 8.480997439873994e-07, + "loss": -0.0281, + "num_tokens": 61897025.0, + "reward": 0.0, + "reward_std": 0.6607404947280884, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01621828804540124, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09213731984068713, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.18811934746029949, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1371 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1343.0, + "completions/mean_length": 1216.875, + "completions/mean_terminated_length": 1176.4285888671875, + "completions/min_length": 1080.0, + "completions/min_terminated_length": 1080.0, + "epoch": 0.34308577144286073, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.662395230035457, + "kl": 0.012115478515625, + "learning_rate": 8.478053732985204e-07, + "loss": -0.0332, + "num_tokens": 61946503.0, + "reward": 0.0, + "reward_std": 1.0370500087738037, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01577260785603514, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09119334975550177, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11666666666666667, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1372 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1324.0, + "completions/max_terminated_length": 1324.0, + "completions/mean_length": 1111.75, + "completions/mean_terminated_length": 1111.75, + "completions/min_length": 968.0, + "completions/min_terminated_length": 968.0, + "epoch": 0.3433358339584896, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.210935536129047, + "kl": 0.0111846923828125, + "learning_rate": 8.475107756918465e-07, + "loss": -0.0011, + "num_tokens": 61992915.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8652464747428894, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10165749169708212, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10078845128470992, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15104573749303493, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1373 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1367.0, + "completions/max_terminated_length": 1367.0, + "completions/mean_length": 1140.5, + "completions/mean_terminated_length": 1140.5, + "completions/min_length": 965.0, + "completions/min_terminated_length": 965.0, + "epoch": 0.3435858964741185, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.854224479905153, + "kl": 0.010711669921875, + "learning_rate": 8.472159513918519e-07, + "loss": -0.0001, + "num_tokens": 62037275.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8921484351158142, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.056195031835015195, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08362610821824608, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15049301694147857, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1374 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1277.6875, + "completions/mean_terminated_length": 1176.6363525390625, + "completions/min_length": 1069.0, + "completions/min_terminated_length": 1069.0, + "epoch": 0.34383595898974745, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.115403241873455, + "kl": 0.007472991943359375, + "learning_rate": 8.469209006231824e-07, + "loss": -0.0282, + "num_tokens": 62074622.0, + "reward": 1.862645149230957e-08, + "reward_std": 1.062798023223877, + "rewards/wordcountpos_reward_GEOBench/mean": 1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.015550227440352362, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06800918272892945, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.062063289083417544, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1375 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1032.0, + "completions/max_terminated_length": 1032.0, + "completions/mean_length": 888.3125, + "completions/mean_terminated_length": 888.3125, + "completions/min_length": 656.0, + "completions/min_terminated_length": 656.0, + "epoch": 0.34408602150537637, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7483840495693883, + "kl": 0.0147857666015625, + "learning_rate": 8.46625623610657e-07, + "loss": 0.0118, + "num_tokens": 62116523.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9447619915008545, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.027101258771083783, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0707279678102323, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1376 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1210.0, + "completions/max_terminated_length": 1210.0, + "completions/mean_length": 839.6875, + "completions/mean_terminated_length": 839.6875, + "completions/min_length": 472.0, + "completions/min_terminated_length": 472.0, + "epoch": 0.34433608402100524, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9226179170743203, + "kl": 0.0104217529296875, + "learning_rate": 8.463301205792675e-07, + "loss": 0.0494, + "num_tokens": 62146206.0, + "reward": 0.0, + "reward_std": 0.7024710178375244, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.050805652250072754, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04215057067721547, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14981470036162822, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1377 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1310.0, + "completions/mean_length": 1060.375, + "completions/mean_terminated_length": 1031.0667724609375, + "completions/min_length": 686.0, + "completions/min_terminated_length": 686.0, + "epoch": 0.34458614653663416, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.053735643997246, + "kl": 0.01056671142578125, + "learning_rate": 8.460343917541766e-07, + "loss": -0.0714, + "num_tokens": 62176340.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0456726551055908, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.005637343563551762, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04572486192531898, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12931443160847217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1378 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1432.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1235.0625, + "completions/mean_terminated_length": 1235.0625, + "completions/min_length": 981.0, + "completions/min_terminated_length": 981.0, + "epoch": 0.3448362090522631, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3217733509884275, + "kl": 0.00865936279296875, + "learning_rate": 8.457384373607203e-07, + "loss": -0.023, + "num_tokens": 62213253.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.38112443685531616, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.040101423432967166, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09719070458757174, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05163977794943225, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1379 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1492.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1002.4375, + "completions/mean_terminated_length": 1002.4375, + "completions/min_length": 696.0, + "completions/min_terminated_length": 696.0, + "epoch": 0.34508627156789196, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.476394031627589, + "kl": 0.01483154296875, + "learning_rate": 8.454422576244059e-07, + "loss": -0.0013, + "num_tokens": 62265692.0, + "reward": 0.0, + "reward_std": 0.6387931108474731, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.17671823636767237, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2122131503550537, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12641788434189793, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1380 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1145.4375, + "completions/mean_terminated_length": 1094.7857666015625, + "completions/min_length": 851.0, + "completions/min_terminated_length": 851.0, + "epoch": 0.3453363340835209, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4106056331978736, + "kl": 0.006572723388671875, + "learning_rate": 8.451458527709126e-07, + "loss": 0.0107, + "num_tokens": 62312795.0, + "reward": 0.0, + "reward_std": 0.5408207178115845, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.15783555048656708, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.26584127324632095, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10390522747338701, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1381 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1142.25, + "completions/mean_terminated_length": 1059.6923828125, + "completions/min_length": 897.0, + "completions/min_terminated_length": 897.0, + "epoch": 0.3455863965991498, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.751765942227762, + "kl": 0.0087432861328125, + "learning_rate": 8.448492230260909e-07, + "loss": -0.0335, + "num_tokens": 62365735.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0680699348449707, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.21059537627224115, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.6975162260811915, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16815997674172586, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1382 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1263.875, + "completions/mean_terminated_length": 1248.1334228515625, + "completions/min_length": 963.0, + "completions/min_terminated_length": 963.0, + "epoch": 0.34583645911477867, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.046781687539622, + "kl": 0.0130767822265625, + "learning_rate": 8.445523686159629e-07, + "loss": -0.0309, + "num_tokens": 62408941.0, + "reward": 0.0, + "reward_std": 0.7628606557846069, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0038048222520865004, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08603529809024699, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1383 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1489.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1046.6875, + "completions/mean_terminated_length": 1046.6875, + "completions/min_length": 813.0, + "completions/min_terminated_length": 813.0, + "epoch": 0.3460865216304076, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.817914222895259, + "kl": 0.00885009765625, + "learning_rate": 8.442552897667218e-07, + "loss": 0.0027, + "num_tokens": 62442472.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0344934463500977, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01622394281034219, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06716545727353398, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503963, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1384 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1300.9375, + "completions/mean_terminated_length": 1210.45458984375, + "completions/min_length": 928.0, + "completions/min_terminated_length": 928.0, + "epoch": 0.3463365841460365, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6283126842839724, + "kl": 0.00928497314453125, + "learning_rate": 8.439579867047316e-07, + "loss": -0.0078, + "num_tokens": 62498615.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.906869649887085, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1084614662816697, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19793487677411106, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15962919996504865, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1385 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1387.0, + "completions/max_terminated_length": 1387.0, + "completions/mean_length": 1053.25, + "completions/mean_terminated_length": 1053.25, + "completions/min_length": 857.0, + "completions/min_terminated_length": 857.0, + "epoch": 0.34658664666166544, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7623185813837727, + "kl": 0.0146942138671875, + "learning_rate": 8.436604596565277e-07, + "loss": -0.0598, + "num_tokens": 62541475.0, + "reward": 4.470348358154297e-08, + "reward_std": 0.9486637115478516, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06094744403194562, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16534148272134358, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17548873552537866, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1386 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1411.0, + "completions/mean_length": 1416.625, + "completions/mean_terminated_length": 1233.2000732421875, + "completions/min_length": 1036.0, + "completions/min_terminated_length": 1036.0, + "epoch": 0.3468367091772943, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.739987692480355, + "kl": 0.0120086669921875, + "learning_rate": 8.433627088488156e-07, + "loss": 0.023, + "num_tokens": 62600997.0, + "reward": 0.0, + "reward_std": 0.4198510944843292, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.14976617710819964, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.4902563860787563, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10945995377982529, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1387 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1411.0, + "completions/mean_length": 1315.125, + "completions/mean_terminated_length": 1204.2000732421875, + "completions/min_length": 1018.0, + "completions/min_terminated_length": 1018.0, + "epoch": 0.34708677169292323, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9662883537503275, + "kl": 0.00682830810546875, + "learning_rate": 8.430647345084715e-07, + "loss": -0.005, + "num_tokens": 62654671.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8772788643836975, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06744909697396431, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11444420059974887, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921946, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1388 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1301.0, + "completions/mean_terminated_length": 1181.5999755859375, + "completions/min_length": 977.0, + "completions/min_terminated_length": 977.0, + "epoch": 0.34733683420855216, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1605267416896865, + "kl": 0.01263427734375, + "learning_rate": 8.427665368625421e-07, + "loss": -0.0114, + "num_tokens": 62699719.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9423556327819824, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05485011483260446, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1445509065570477, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8250000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1029203215725281, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1389 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1275.6875, + "completions/mean_terminated_length": 1243.6429443359375, + "completions/min_length": 984.0, + "completions/min_terminated_length": 984.0, + "epoch": 0.347586896724181, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8241027947132973, + "kl": 0.0098876953125, + "learning_rate": 8.42468116138244e-07, + "loss": -0.0285, + "num_tokens": 62752442.0, + "reward": 0.0, + "reward_std": 0.8015128374099731, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07257026387453606, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0803725566641226, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746357, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1390 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1212.125, + "completions/mean_terminated_length": 1192.933349609375, + "completions/min_length": 971.0, + "completions/min_terminated_length": 971.0, + "epoch": 0.34783695923980995, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8784510444868436, + "kl": 0.010528564453125, + "learning_rate": 8.421694725629638e-07, + "loss": -0.0031, + "num_tokens": 62799420.0, + "reward": 0.0, + "reward_std": 0.8792747259140015, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0017136252103199748, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05922440473059868, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09953596037316068, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1391 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1220.5625, + "completions/mean_terminated_length": 1127.416748046875, + "completions/min_length": 865.0, + "completions/min_terminated_length": 865.0, + "epoch": 0.3480870217554389, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0277159370070414, + "kl": 0.009429931640625, + "learning_rate": 8.41870606364258e-07, + "loss": 0.0115, + "num_tokens": 62847133.0, + "reward": 0.0, + "reward_std": 0.7778089046478271, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0606212864564622, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06888266028786393, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06763190130459204, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1392 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1136.1875, + "completions/mean_terminated_length": 1111.933349609375, + "completions/min_length": 855.0, + "completions/min_terminated_length": 855.0, + "epoch": 0.34833708427106774, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8348258581229704, + "kl": 0.011444091796875, + "learning_rate": 8.415715177698529e-07, + "loss": 0.0157, + "num_tokens": 62879344.0, + "reward": 0.0, + "reward_std": 0.9490658640861511, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0749717194191582, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07538715888577682, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13270686158262923, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1393 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1349.9375, + "completions/mean_terminated_length": 1328.5, + "completions/min_length": 1149.0, + "completions/min_terminated_length": 1149.0, + "epoch": 0.34858714678669667, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1263933370816974, + "kl": 0.0117034912109375, + "learning_rate": 8.412722070076438e-07, + "loss": -0.0286, + "num_tokens": 62919479.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9168092012405396, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06070572613176774, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1394 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1165.3125, + "completions/mean_terminated_length": 1143.0, + "completions/min_length": 927.0, + "completions/min_terminated_length": 927.0, + "epoch": 0.3488372093023256, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4586634068445785, + "kl": 0.0137939453125, + "learning_rate": 8.409726743056957e-07, + "loss": -0.0165, + "num_tokens": 62965204.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8458037376403809, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.014163322459595836, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18633306318050205, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1395 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1304.0, + "completions/mean_length": 1047.5625, + "completions/mean_terminated_length": 1017.4000244140625, + "completions/min_length": 796.0, + "completions/min_terminated_length": 796.0, + "epoch": 0.3490872718179545, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0224656357880684, + "kl": 0.01103973388671875, + "learning_rate": 8.406729198922426e-07, + "loss": -0.0286, + "num_tokens": 63013477.0, + "reward": 0.0, + "reward_std": 0.6933915615081787, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11789831973908821, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1364450510157611, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12758439472669758, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1396 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1146.0, + "completions/max_terminated_length": 1146.0, + "completions/mean_length": 939.5, + "completions/mean_terminated_length": 939.5, + "completions/min_length": 220.0, + "completions/min_terminated_length": 220.0, + "epoch": 0.3493373343335834, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0895862120798974, + "kl": 0.00936126708984375, + "learning_rate": 8.403729439956872e-07, + "loss": -0.0067, + "num_tokens": 63063141.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.4159996509552002, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11134996275156225, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1404583974658499, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.19007795671678931, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1397 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1267.9375, + "completions/mean_terminated_length": 1214.3846435546875, + "completions/min_length": 921.0, + "completions/min_terminated_length": 921.0, + "epoch": 0.3495873968492123, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1916428139076425, + "kl": 0.0142974853515625, + "learning_rate": 8.400727468446014e-07, + "loss": -0.0451, + "num_tokens": 63115460.0, + "reward": 0.0, + "reward_std": 0.8990896344184875, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00321851640634669, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05813322379148299, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11792967144619462, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1398 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1303.0, + "completions/max_terminated_length": 1303.0, + "completions/mean_length": 1085.875, + "completions/mean_terminated_length": 1085.875, + "completions/min_length": 859.0, + "completions/min_terminated_length": 859.0, + "epoch": 0.34983745936484123, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8305818929290893, + "kl": 0.013580322265625, + "learning_rate": 8.397723286677253e-07, + "loss": -0.0022, + "num_tokens": 63151514.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8354265689849854, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01857045590646101, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08104275657760371, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0843274042711568, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1399 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1301.9375, + "completions/mean_terminated_length": 1211.9091796875, + "completions/min_length": 889.0, + "completions/min_terminated_length": 889.0, + "epoch": 0.3500875218804701, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.014260009221193, + "kl": 0.0098419189453125, + "learning_rate": 8.394716896939676e-07, + "loss": -0.0381, + "num_tokens": 63204929.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.016596794128418, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010199382883629387, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.047779405697486226, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09803627446568497, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1400 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1322.6875, + "completions/mean_terminated_length": 1310.86669921875, + "completions/min_length": 1084.0, + "completions/min_terminated_length": 1084.0, + "epoch": 0.350337584396099, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6833575530788654, + "kl": 0.008453369140625, + "learning_rate": 8.391708301524051e-07, + "loss": 0.0112, + "num_tokens": 63249980.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9459744095802307, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04074506437310606, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04996835014437639, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08509254221575907, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1401 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1169.75, + "completions/mean_terminated_length": 971.6000366210938, + "completions/min_length": 678.0, + "completions/min_terminated_length": 678.0, + "epoch": 0.35058764691172795, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6803402338943467, + "kl": 0.00658416748046875, + "learning_rate": 8.388697502722831e-07, + "loss": 0.0329, + "num_tokens": 63289584.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.35422903299331665, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1215634643093306, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1479891042404967, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.23913424093003743, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1402 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1191.5625, + "completions/mean_terminated_length": 1171.0001220703125, + "completions/min_length": 960.0, + "completions/min_terminated_length": 960.0, + "epoch": 0.3508377094273568, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6760822060114453, + "kl": 0.0076751708984375, + "learning_rate": 8.38568450283014e-07, + "loss": -0.0119, + "num_tokens": 63333417.0, + "reward": 0.0, + "reward_std": 0.639259934425354, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.21640642105607488, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2644054011278265, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.094182643679026, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1403 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1106.0, + "completions/mean_length": 915.0625, + "completions/mean_terminated_length": 876.0667114257812, + "completions/min_length": 464.0, + "completions/min_terminated_length": 464.0, + "epoch": 0.35108777194298574, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0331045747013143, + "kl": 0.0088043212890625, + "learning_rate": 8.382669304141789e-07, + "loss": 0.0536, + "num_tokens": 63377762.0, + "reward": 0.0, + "reward_std": 0.9753798246383667, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09190823685657186, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1485965741733891, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087683, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1404 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1297.0, + "completions/max_terminated_length": 1297.0, + "completions/mean_length": 1044.9375, + "completions/mean_terminated_length": 1044.9375, + "completions/min_length": 860.0, + "completions/min_terminated_length": 860.0, + "epoch": 0.35133783445861466, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5394768342656513, + "kl": 0.009857177734375, + "learning_rate": 8.379651908955254e-07, + "loss": 0.0421, + "num_tokens": 63418089.0, + "reward": 0.0, + "reward_std": 0.6665937304496765, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.017653579900542612, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04916031347690977, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715322, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1405 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1476.0625, + "completions/mean_terminated_length": 1423.4000244140625, + "completions/min_length": 1286.0, + "completions/min_terminated_length": 1286.0, + "epoch": 0.3515878969742436, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6639013191779637, + "kl": 0.0096588134765625, + "learning_rate": 8.376632319569692e-07, + "loss": 0.0202, + "num_tokens": 63483586.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9776710867881775, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.008389517971865757, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03671897927364567, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12041594578792296, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1406 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1363.8125, + "completions/mean_terminated_length": 1257.888916015625, + "completions/min_length": 1149.0, + "completions/min_terminated_length": 1149.0, + "epoch": 0.35183795948987245, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6072912984436747, + "kl": 0.009613037109375, + "learning_rate": 8.373610538285931e-07, + "loss": 0.0529, + "num_tokens": 63540375.0, + "reward": 2.2351741790771484e-08, + "reward_std": 0.9803466796875, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09932895680400639, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11646905812106126, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17363222093882275, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1407 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1069.0, + "completions/mean_terminated_length": 1007.4285888671875, + "completions/min_length": 662.0, + "completions/min_terminated_length": 662.0, + "epoch": 0.3520880220055014, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3435146284028807, + "kl": 0.010711669921875, + "learning_rate": 8.370586567406466e-07, + "loss": 0.0133, + "num_tokens": 63578423.0, + "reward": 0.0, + "reward_std": 0.9725617170333862, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0123075134859164, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.034247597874148966, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12931443160847217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1408 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1350.0, + "completions/max_terminated_length": 1350.0, + "completions/mean_length": 1101.0, + "completions/mean_terminated_length": 1101.0, + "completions/min_length": 753.0, + "completions/min_terminated_length": 753.0, + "epoch": 0.3523380845211303, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.644887960491874, + "kl": 0.008270263671875, + "learning_rate": 8.367560409235465e-07, + "loss": -0.0005, + "num_tokens": 63621639.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6784014701843262, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05954176097058973, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07650819137936059, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1409 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1273.0625, + "completions/mean_terminated_length": 1220.6923828125, + "completions/min_length": 933.0, + "completions/min_terminated_length": 933.0, + "epoch": 0.35258814703675917, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3509584074207073, + "kl": 0.0128326416015625, + "learning_rate": 8.364532066078755e-07, + "loss": -0.02, + "num_tokens": 63672664.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0616716146469116, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00407880447794377, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06292808279698266, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10610965676722955, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1410 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1463.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1079.9375, + "completions/mean_terminated_length": 1079.9375, + "completions/min_length": 922.0, + "completions/min_terminated_length": 922.0, + "epoch": 0.3528382095523881, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9007072842592376, + "kl": 0.0097808837890625, + "learning_rate": 8.361501540243837e-07, + "loss": 0.0025, + "num_tokens": 63708935.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8188549280166626, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.019169312099119963, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11313423238487423, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921948, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1411 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1180.6875, + "completions/mean_terminated_length": 1159.4000244140625, + "completions/min_length": 971.0, + "completions/min_terminated_length": 971.0, + "epoch": 0.353088272068017, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6279690754448106, + "kl": 0.01434326171875, + "learning_rate": 8.358468834039869e-07, + "loss": 0.0082, + "num_tokens": 63755850.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.062549114227295, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04216103962541417, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12814997146330054, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12102953419784837, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1412 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1228.0, + "completions/max_terminated_length": 1228.0, + "completions/mean_length": 976.4375, + "completions/mean_terminated_length": 976.4375, + "completions/min_length": 669.0, + "completions/min_terminated_length": 669.0, + "epoch": 0.3533383345836459, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0913096337783674, + "kl": 0.0102996826171875, + "learning_rate": 8.355433949777674e-07, + "loss": 0.0094, + "num_tokens": 63791441.0, + "reward": 0.0, + "reward_std": 0.7100746035575867, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.039759523494895185, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16707531754708108, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09888264649460884, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1413 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1089.875, + "completions/mean_terminated_length": 1031.2857666015625, + "completions/min_length": 726.0, + "completions/min_terminated_length": 726.0, + "epoch": 0.3535883970992748, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3083428151770704, + "kl": 0.0123138427734375, + "learning_rate": 8.352396889769731e-07, + "loss": -0.0478, + "num_tokens": 63843487.0, + "reward": 0.0, + "reward_std": 0.5548100471496582, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0020816519641501576, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17643003992362857, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.102469507659596, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1414 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1290.0, + "completions/max_terminated_length": 1290.0, + "completions/mean_length": 986.625, + "completions/mean_terminated_length": 986.625, + "completions/min_length": 776.0, + "completions/min_terminated_length": 776.0, + "epoch": 0.35383845961490373, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7474365844578372, + "kl": 0.0149078369140625, + "learning_rate": 8.349357656330182e-07, + "loss": -0.051, + "num_tokens": 63878881.0, + "reward": 0.0, + "reward_std": 0.42258012294769287, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06884884997619202, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19069887325942927, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09496588081262934, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1415 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1414.0, + "completions/max_terminated_length": 1414.0, + "completions/mean_length": 887.75, + "completions/mean_terminated_length": 887.75, + "completions/min_length": 478.0, + "completions/min_terminated_length": 478.0, + "epoch": 0.35408852213053266, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8811381906874294, + "kl": 0.015350341796875, + "learning_rate": 8.346316251774817e-07, + "loss": -0.0026, + "num_tokens": 63911901.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0121289491653442, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.043046705002298034, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08656224650204744, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1355373393953503, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1416 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1255.375, + "completions/mean_terminated_length": 1220.4285888671875, + "completions/min_length": 1020.0, + "completions/min_terminated_length": 1020.0, + "epoch": 0.3543385846461615, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.538585709961367, + "kl": 0.00826263427734375, + "learning_rate": 8.34327267842109e-07, + "loss": -0.0008, + "num_tokens": 63963747.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.017878532409668, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.19689041713706484, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.317608485820128, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1520233900132184, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1417 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1364.0, + "completions/mean_length": 1222.3125, + "completions/mean_terminated_length": 1158.2308349609375, + "completions/min_length": 869.0, + "completions/min_terminated_length": 869.0, + "epoch": 0.35458864716179045, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.313037938410671, + "kl": 0.013427734375, + "learning_rate": 8.340226938588098e-07, + "loss": -0.0169, + "num_tokens": 64015160.0, + "reward": 0.0, + "reward_std": 0.8891949653625488, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05313899248937659, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08228540046271833, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.053748384988657034, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1418 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1282.0, + "completions/max_terminated_length": 1282.0, + "completions/mean_length": 1082.125, + "completions/mean_terminated_length": 1082.125, + "completions/min_length": 902.0, + "completions/min_terminated_length": 902.0, + "epoch": 0.3548387096774194, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4156111360606562, + "kl": 0.0127716064453125, + "learning_rate": 8.337179034596598e-07, + "loss": -0.0057, + "num_tokens": 64060410.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8986164331436157, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0004695707288395358, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0018782829153581432, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0926962382871743, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1419 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1271.625, + "completions/mean_terminated_length": 1218.923095703125, + "completions/min_length": 907.0, + "completions/min_terminated_length": 907.0, + "epoch": 0.35508877219304824, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6055278894282776, + "kl": 0.00804901123046875, + "learning_rate": 8.334128968768987e-07, + "loss": 0.0317, + "num_tokens": 64113460.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0472326278686523, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05444124695421547, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21712959929183825, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941139, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1420 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1172.3125, + "completions/mean_terminated_length": 1150.4666748046875, + "completions/min_length": 753.0, + "completions/min_terminated_length": 753.0, + "epoch": 0.35533883470867716, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.167788718331849, + "kl": 0.0126800537109375, + "learning_rate": 8.331076743429319e-07, + "loss": 0.0541, + "num_tokens": 64143129.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0336756706237793, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.002760872294587036, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03426819092797175, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1421 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1206.0, + "completions/mean_length": 1131.5, + "completions/mean_terminated_length": 1078.857177734375, + "completions/min_length": 799.0, + "completions/min_terminated_length": 799.0, + "epoch": 0.3555888972243061, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5733456870821203, + "kl": 0.01727294921875, + "learning_rate": 8.328022360903284e-07, + "loss": -0.0115, + "num_tokens": 64187009.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9005129337310791, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.028088058999262883, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04560113051828157, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0859586463881842, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1422 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1271.9375, + "completions/mean_terminated_length": 1195.916748046875, + "completions/min_length": 741.0, + "completions/min_terminated_length": 741.0, + "epoch": 0.35583895973993496, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1431274180968467, + "kl": 0.015167236328125, + "learning_rate": 8.324965823518222e-07, + "loss": 0.0211, + "num_tokens": 64243520.0, + "reward": -3.725290298461914e-09, + "reward_std": 0.9761375188827515, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.033548536599983685, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08574434963282675, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0850925422157591, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1423 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1380.0, + "completions/mean_length": 1174.5625, + "completions/mean_terminated_length": 1099.4615478515625, + "completions/min_length": 756.0, + "completions/min_terminated_length": 756.0, + "epoch": 0.3560890222555639, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.402479590662355, + "kl": 0.0099029541015625, + "learning_rate": 8.321907133603111e-07, + "loss": -0.0162, + "num_tokens": 64284921.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.975763738155365, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0022920745724567685, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04772839132403494, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1424 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1361.4375, + "completions/mean_terminated_length": 1253.6666259765625, + "completions/min_length": 903.0, + "completions/min_terminated_length": 903.0, + "epoch": 0.3563390847711928, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.864507925021821, + "kl": 0.0059814453125, + "learning_rate": 8.318846293488574e-07, + "loss": -0.0045, + "num_tokens": 64339768.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9175720810890198, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0564110938880618, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08995111674399459, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11080513425729777, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1425 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1204.0, + "completions/max_terminated_length": 1204.0, + "completions/mean_length": 955.8125, + "completions/mean_terminated_length": 955.8125, + "completions/min_length": 774.0, + "completions/min_terminated_length": 774.0, + "epoch": 0.35658914728682173, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4521445539025355, + "kl": 0.005420684814453125, + "learning_rate": 8.315783305506867e-07, + "loss": 0.0129, + "num_tokens": 64386157.0, + "reward": 0.0, + "reward_std": 0.9826600551605225, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02192938213888186, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06359996632041727, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11213417888437975, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1426 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1435.0, + "completions/mean_length": 1223.875, + "completions/mean_terminated_length": 1205.4666748046875, + "completions/min_length": 893.0, + "completions/min_terminated_length": 893.0, + "epoch": 0.3568392098024506, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0649934501122917, + "kl": 0.0129241943359375, + "learning_rate": 8.312718171991886e-07, + "loss": 0.0381, + "num_tokens": 64431827.0, + "reward": 0.0, + "reward_std": 0.988744854927063, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.3564471041723391, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3847151469233825, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13526380260918403, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1427 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1348.0, + "completions/mean_length": 1153.0625, + "completions/mean_terminated_length": 1129.933349609375, + "completions/min_length": 911.0, + "completions/min_terminated_length": 911.0, + "epoch": 0.3570892723180795, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.597148313602462, + "kl": 0.009765625, + "learning_rate": 8.30965089527916e-07, + "loss": -0.0203, + "num_tokens": 64482132.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.034332513809204, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.14604256947297814, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11214037051992685, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.053748384988657, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1428 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1427.0, + "completions/mean_length": 1107.8125, + "completions/mean_terminated_length": 929.5454711914062, + "completions/min_length": 717.0, + "completions/min_terminated_length": 717.0, + "epoch": 0.35733933483370844, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.166109369955294, + "kl": 0.00629425048828125, + "learning_rate": 8.306581477705853e-07, + "loss": -0.0167, + "num_tokens": 64522881.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9185409545898438, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.021405224755129185, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06836034659628237, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238707, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1429 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1380.0, + "completions/max_terminated_length": 1380.0, + "completions/mean_length": 1032.0, + "completions/mean_terminated_length": 1032.0, + "completions/min_length": 606.0, + "completions/min_terminated_length": 606.0, + "epoch": 0.3575893973493373, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.349045496301345, + "kl": 0.00574493408203125, + "learning_rate": 8.303509921610759e-07, + "loss": -0.0278, + "num_tokens": 64557769.0, + "reward": 0.0, + "reward_std": 0.6116563081741333, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05600738180821054, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18885544450400735, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08421753138505422, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1430 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1220.1875, + "completions/mean_terminated_length": 1155.615478515625, + "completions/min_length": 814.0, + "completions/min_terminated_length": 814.0, + "epoch": 0.35783945986496624, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9347703489302277, + "kl": 0.011077880859375, + "learning_rate": 8.300436229334301e-07, + "loss": -0.0256, + "num_tokens": 64607332.0, + "reward": 0.0, + "reward_std": 0.6322227716445923, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0791943836598727, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09495431334767776, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08432740427115681, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1431 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1129.5625, + "completions/mean_terminated_length": 961.1818237304688, + "completions/min_length": 703.0, + "completions/min_terminated_length": 703.0, + "epoch": 0.35808952238059516, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.425942061730681, + "kl": 0.005184173583984375, + "learning_rate": 8.297360403218531e-07, + "loss": 0.0305, + "num_tokens": 64648429.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8759142160415649, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.011920055443797133, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.052098258431193756, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042253, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1432 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1310.0, + "completions/max_terminated_length": 1310.0, + "completions/mean_length": 1119.125, + "completions/mean_terminated_length": 1119.125, + "completions/min_length": 877.0, + "completions/min_terminated_length": 877.0, + "epoch": 0.3583395848962241, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2785675132618577, + "kl": 0.00800323486328125, + "learning_rate": 8.294282445607124e-07, + "loss": -0.0138, + "num_tokens": 64699503.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.967159628868103, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10157629944082656, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12008326091180696, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238706, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1433 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1103.1875, + "completions/mean_terminated_length": 1046.5, + "completions/min_length": 664.0, + "completions/min_terminated_length": 664.0, + "epoch": 0.35858964741185295, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.360245395494562, + "kl": 0.009179115295410156, + "learning_rate": 8.291202358845385e-07, + "loss": -0.0491, + "num_tokens": 64741266.0, + "reward": 0.0, + "reward_std": 0.3042250871658325, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.18281323059719468, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.22939818278267762, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11122216672215289, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1434 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1435.0, + "completions/mean_length": 1242.125, + "completions/mean_terminated_length": 1124.9091796875, + "completions/min_length": 845.0, + "completions/min_terminated_length": 845.0, + "epoch": 0.3588397099274819, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.181283976244887, + "kl": 0.0138702392578125, + "learning_rate": 8.288120145280236e-07, + "loss": -0.0091, + "num_tokens": 64790604.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0599753856658936, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005233597697946433, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10397087249069512, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14580555290954889, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1435 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1134.625, + "completions/mean_terminated_length": 850.4444580078125, + "completions/min_length": 689.0, + "completions/min_terminated_length": 689.0, + "epoch": 0.3590897724431108, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1814230068778975, + "kl": 0.0091705322265625, + "learning_rate": 8.285035807260221e-07, + "loss": 0.0511, + "num_tokens": 64830702.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0194575786590576, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.009657040521922823, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06332690631377433, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11279282877125756, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1436 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1265.75, + "completions/mean_terminated_length": 1232.2857666015625, + "completions/min_length": 1072.0, + "completions/min_terminated_length": 1072.0, + "epoch": 0.35933983495873967, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.334499907716662, + "kl": 0.010040283203125, + "learning_rate": 8.281949347135504e-07, + "loss": -0.0006, + "num_tokens": 64871226.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.794493556022644, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07001490977060987, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09283433478641878, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06540472290116194, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1437 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1234.0, + "completions/mean_terminated_length": 1216.2667236328125, + "completions/min_length": 832.0, + "completions/min_terminated_length": 832.0, + "epoch": 0.3595898974743686, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9360745298280047, + "kl": 0.0112762451171875, + "learning_rate": 8.278860767257864e-07, + "loss": -0.0321, + "num_tokens": 64916418.0, + "reward": 0.0, + "reward_std": 0.7654714584350586, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04412153006633763, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03321721870022851, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08509254221575908, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1438 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1299.0, + "completions/mean_length": 988.25, + "completions/mean_terminated_length": 954.1333618164062, + "completions/min_length": 611.0, + "completions/min_terminated_length": 611.0, + "epoch": 0.3598399599899975, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5700679644605047, + "kl": 0.0174560546875, + "learning_rate": 8.275770069980697e-07, + "loss": -0.022, + "num_tokens": 64959206.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.0384268760681152, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0724044901980166, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11127898963518723, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06070572613176774, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1439 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1417.5625, + "completions/mean_terminated_length": 1380.0909423828125, + "completions/min_length": 1123.0, + "completions/min_terminated_length": 1123.0, + "epoch": 0.3600900225056264, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.672880263424142, + "kl": 0.0112152099609375, + "learning_rate": 8.272677257659009e-07, + "loss": 0.0105, + "num_tokens": 65017815.0, + "reward": 0.0, + "reward_std": 0.9780192375183105, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05696435780049319, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05505248152677053, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08421753138505425, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1440 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1274.0, + "completions/max_terminated_length": 1274.0, + "completions/mean_length": 998.375, + "completions/mean_terminated_length": 998.375, + "completions/min_length": 709.0, + "completions/min_terminated_length": 709.0, + "epoch": 0.3603400850212553, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1129541968181984, + "kl": 0.0112152099609375, + "learning_rate": 8.269582332649423e-07, + "loss": -0.0114, + "num_tokens": 65056893.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0046532154083252, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.14847390428897073, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17213915341306768, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1441 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1260.3125, + "completions/mean_terminated_length": 1205.0, + "completions/min_length": 951.0, + "completions/min_terminated_length": 951.0, + "epoch": 0.36059014753688423, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.349623899732544, + "kl": 0.0134124755859375, + "learning_rate": 8.266485297310169e-07, + "loss": 0.0001, + "num_tokens": 65116858.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.8296217918395996, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.012615710136212665, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04324749349458741, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.107496769977314, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1442 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1200.0, + "completions/max_terminated_length": 1200.0, + "completions/mean_length": 978.625, + "completions/mean_terminated_length": 978.625, + "completions/min_length": 770.0, + "completions/min_terminated_length": 770.0, + "epoch": 0.36084021005251316, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8407759946970277, + "kl": 0.0081024169921875, + "learning_rate": 8.263386154001084e-07, + "loss": -0.0109, + "num_tokens": 65157108.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7655435800552368, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06613936364587766, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09308979230725113, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639736, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1443 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1242.25, + "completions/mean_terminated_length": 1087.5999755859375, + "completions/min_length": 432.0, + "completions/min_terminated_length": 432.0, + "epoch": 0.361090272568142, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0080518159823915, + "kl": 0.0130767822265625, + "learning_rate": 8.260284905083612e-07, + "loss": 0.0071, + "num_tokens": 65221664.0, + "reward": 0.0, + "reward_std": 0.996924638748169, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.004729342803718804, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18479198378369324, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0807373427759331, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1444 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1118.0, + "completions/max_terminated_length": 1118.0, + "completions/mean_length": 954.375, + "completions/mean_terminated_length": 954.375, + "completions/min_length": 731.0, + "completions/min_terminated_length": 731.0, + "epoch": 0.36134033508377095, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.76489702214252, + "kl": 0.0108184814453125, + "learning_rate": 8.257181552920803e-07, + "loss": -0.0393, + "num_tokens": 65261806.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0057706832885742, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02282203329805966, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07266238973236112, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06540472290116198, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1445 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1405.0, + "completions/mean_length": 1250.75, + "completions/mean_terminated_length": 1234.1334228515625, + "completions/min_length": 1052.0, + "completions/min_terminated_length": 1052.0, + "epoch": 0.36159039759939987, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.178844035187604, + "kl": 0.0145416259765625, + "learning_rate": 8.254076099877307e-07, + "loss": 0.0176, + "num_tokens": 65305562.0, + "reward": 0.0, + "reward_std": 0.784500241279602, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0950283844164577, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.6261626557027747, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026005, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1446 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1305.0, + "completions/max_terminated_length": 1305.0, + "completions/mean_length": 920.5625, + "completions/mean_terminated_length": 920.5625, + "completions/min_length": 569.0, + "completions/min_terminated_length": 569.0, + "epoch": 0.36184046011502874, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9740684681418164, + "kl": 0.01446533203125, + "learning_rate": 8.250968548319375e-07, + "loss": 0.0626, + "num_tokens": 65336155.0, + "reward": 0.0, + "reward_std": 1.044834852218628, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06734299678350525, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06955154800699079, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466156, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1447 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1354.0, + "completions/mean_length": 1134.3125, + "completions/mean_terminated_length": 1012.4166870117188, + "completions/min_length": 702.0, + "completions/min_terminated_length": 702.0, + "epoch": 0.36209052263065766, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.37452146330244, + "kl": 0.006134033203125, + "learning_rate": 8.247858900614859e-07, + "loss": 0.0093, + "num_tokens": 65382832.0, + "reward": 0.0, + "reward_std": 0.6833464503288269, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04500608437251381, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1402677440559909, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0925962962222252, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1448 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1437.125, + "completions/mean_terminated_length": 1374.25, + "completions/min_length": 1215.0, + "completions/min_terminated_length": 1215.0, + "epoch": 0.3623405851462866, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.928790633570917, + "kl": 0.0098724365234375, + "learning_rate": 8.244747159133209e-07, + "loss": 0.01, + "num_tokens": 65438154.0, + "reward": 0.0, + "reward_std": 0.7756710052490234, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.048491663130788495, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09858554351944224, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059004080210452274, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1449 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1291.0, + "completions/mean_terminated_length": 1277.0667724609375, + "completions/min_length": 979.0, + "completions/min_terminated_length": 979.0, + "epoch": 0.36259064766191546, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0000104869755777, + "kl": 0.0146026611328125, + "learning_rate": 8.241633326245465e-07, + "loss": -0.0189, + "num_tokens": 65492026.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9726828336715698, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.009325547672413642, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08749514573579648, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11080513425729775, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1450 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1404.0, + "completions/mean_length": 1263.4375, + "completions/mean_terminated_length": 1121.5, + "completions/min_length": 802.0, + "completions/min_terminated_length": 802.0, + "epoch": 0.3628407101775444, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.721739458535426, + "kl": 0.01092529296875, + "learning_rate": 8.238517404324268e-07, + "loss": -0.0289, + "num_tokens": 65529505.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7049499154090881, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10486709589939128, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0929020941589751, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797315, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1451 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1368.0, + "completions/max_terminated_length": 1368.0, + "completions/mean_length": 1012.9375, + "completions/mean_terminated_length": 1012.9375, + "completions/min_length": 654.0, + "completions/min_terminated_length": 654.0, + "epoch": 0.3630907726931733, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.749456610795307, + "kl": 0.01373291015625, + "learning_rate": 8.235399395743842e-07, + "loss": 0.0018, + "num_tokens": 65562136.0, + "reward": 0.0, + "reward_std": 1.0451421737670898, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.030768954128493623, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05125185410889866, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10036968702787748, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1452 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1476.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1224.0625, + "completions/mean_terminated_length": 1224.0625, + "completions/min_length": 994.0, + "completions/min_terminated_length": 994.0, + "epoch": 0.3633408352088022, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4561817056207405, + "kl": 0.01739501953125, + "learning_rate": 8.232279302880012e-07, + "loss": -0.0121, + "num_tokens": 65601353.0, + "reward": 0.0, + "reward_std": 0.9092826843261719, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.260601789171338, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.35169633555768964, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0925962962222252, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1453 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1357.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 1019.875, + "completions/mean_terminated_length": 1019.875, + "completions/min_length": 864.0, + "completions/min_terminated_length": 864.0, + "epoch": 0.3635908977244311, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0506289215178204, + "kl": 0.010498046875, + "learning_rate": 8.22915712811018e-07, + "loss": -0.0219, + "num_tokens": 65655671.0, + "reward": 0.0, + "reward_std": 0.6791718602180481, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08254699828539773, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1205785835810969, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1800720020600813, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1454 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1321.0, + "completions/mean_length": 1097.75, + "completions/mean_terminated_length": 1040.2857666015625, + "completions/min_length": 754.0, + "completions/min_terminated_length": 754.0, + "epoch": 0.36384096024006, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.184444611329402, + "kl": 0.0143280029296875, + "learning_rate": 8.226032873813343e-07, + "loss": -0.0116, + "num_tokens": 65696323.0, + "reward": 0.0, + "reward_std": 0.856545090675354, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.004600382210902765, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07575548462506812, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563383, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1455 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1438.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1198.9375, + "completions/mean_terminated_length": 1198.9375, + "completions/min_length": 897.0, + "completions/min_terminated_length": 897.0, + "epoch": 0.36409102275568894, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6598205618428503, + "kl": 0.0081787109375, + "learning_rate": 8.222906542370076e-07, + "loss": -0.0279, + "num_tokens": 65736466.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0292458534240723, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06350595792807404, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.108488820520782, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09803627446568497, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1456 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 1091.4375, + "completions/mean_terminated_length": 1064.2000732421875, + "completions/min_length": 781.0, + "completions/min_terminated_length": 781.0, + "epoch": 0.3643410852713178, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.798792628398827, + "kl": 0.00902557373046875, + "learning_rate": 8.21977813616254e-07, + "loss": 0.0156, + "num_tokens": 65772825.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8339080214500427, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.006272297104600367, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.041926128175689546, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590963, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1457 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1373.0, + "completions/mean_length": 1167.9375, + "completions/mean_terminated_length": 1145.800048828125, + "completions/min_length": 929.0, + "completions/min_terminated_length": 929.0, + "epoch": 0.36459114778694673, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.15124887977143, + "kl": 0.0142822265625, + "learning_rate": 8.216647657574478e-07, + "loss": -0.0512, + "num_tokens": 65812072.0, + "reward": 1.862645149230957e-08, + "reward_std": 1.0648953914642334, + "rewards/wordcountpos_reward_GEOBench/mean": 1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.016848957766204153, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07844489154186499, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316811, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1458 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1264.0, + "completions/max_terminated_length": 1264.0, + "completions/mean_length": 983.5625, + "completions/mean_terminated_length": 983.5625, + "completions/min_length": 672.0, + "completions/min_terminated_length": 672.0, + "epoch": 0.36484121030257566, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4609262492605697, + "kl": 0.008525848388671875, + "learning_rate": 8.21351510899121e-07, + "loss": 0.0363, + "num_tokens": 65853065.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6679079532623291, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03996610096575941, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09515390848948463, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8250000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258099, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1459 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1456.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1057.5625, + "completions/mean_terminated_length": 1057.5625, + "completions/min_length": 928.0, + "completions/min_terminated_length": 928.0, + "epoch": 0.3650912728182045, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.807596531593123, + "kl": 0.012664794921875, + "learning_rate": 8.210380492799636e-07, + "loss": 0.0072, + "num_tokens": 65895826.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.7012479305267334, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.23784507604769084, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.31670098445797196, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08255189164891873, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1460 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1179.0, + "completions/max_terminated_length": 1179.0, + "completions/mean_length": 1024.5625, + "completions/mean_terminated_length": 1024.5625, + "completions/min_length": 845.0, + "completions/min_terminated_length": 845.0, + "epoch": 0.36534133533383345, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8964901965960332, + "kl": 0.01568603515625, + "learning_rate": 8.207243811388223e-07, + "loss": -0.0196, + "num_tokens": 65936139.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.8754912614822388, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.021999214385735495, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12117255641811589, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0958393717904348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1461 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1094.5, + "completions/mean_terminated_length": 1036.571533203125, + "completions/min_length": 833.0, + "completions/min_terminated_length": 833.0, + "epoch": 0.3655913978494624, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.486861027726057, + "kl": 0.009765625, + "learning_rate": 8.204105067147025e-07, + "loss": -0.0064, + "num_tokens": 65975027.0, + "reward": 0.0, + "reward_std": 0.8863747119903564, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1013065481441292, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10615635111851703, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12164002752505566, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1462 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1441.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1084.5, + "completions/mean_terminated_length": 1084.5, + "completions/min_length": 853.0, + "completions/min_terminated_length": 853.0, + "epoch": 0.3658414603650913, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0701241853885897, + "kl": 0.0131378173828125, + "learning_rate": 8.200964262467656e-07, + "loss": -0.0176, + "num_tokens": 66019219.0, + "reward": 0.0, + "reward_std": 0.8863440752029419, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07914455671627105, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09781532160809377, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07302967433402213, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1463 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1262.0, + "completions/mean_terminated_length": 1228.0, + "completions/min_length": 1001.0, + "completions/min_terminated_length": 1001.0, + "epoch": 0.36609152288072017, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.927176979347193, + "kl": 0.0101318359375, + "learning_rate": 8.197821399743309e-07, + "loss": -0.0159, + "num_tokens": 66057227.0, + "reward": 0.0, + "reward_std": 1.0157487392425537, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10975484896235452, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14255764434985752, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09418264367902596, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1464 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1361.0, + "completions/mean_length": 1213.0, + "completions/mean_terminated_length": 1193.86669921875, + "completions/min_length": 783.0, + "completions/min_terminated_length": 783.0, + "epoch": 0.3663415853963491, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3775762635753717, + "kl": 0.015838623046875, + "learning_rate": 8.194676481368737e-07, + "loss": 0.0186, + "num_tokens": 66101595.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9426479935646057, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0010942154887208849, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05144610691388416, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0850925422157591, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1465 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 1107.5625, + "completions/mean_terminated_length": 1081.4000244140625, + "completions/min_length": 810.0, + "completions/min_terminated_length": 810.0, + "epoch": 0.366591647911978, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3182467937938815, + "kl": 0.0146026611328125, + "learning_rate": 8.191529509740265e-07, + "loss": -0.005, + "num_tokens": 66147252.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0074886083602905, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03187341993382932, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09322471583748318, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12171612389003693, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1466 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1395.0, + "completions/max_terminated_length": 1395.0, + "completions/mean_length": 1076.75, + "completions/mean_terminated_length": 1076.75, + "completions/min_length": 736.0, + "completions/min_terminated_length": 736.0, + "epoch": 0.3668417104276069, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8490245460732484, + "kl": 0.0184478759765625, + "learning_rate": 8.188380487255781e-07, + "loss": -0.0768, + "num_tokens": 66199192.0, + "reward": -4.0978193283081055e-08, + "reward_std": 0.9600286483764648, + "rewards/wordcountpos_reward_GEOBench/mean": -4.0978193283081055e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09627809576008622, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07892267560220449, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639736, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1467 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1238.0, + "completions/mean_length": 903.875, + "completions/mean_terminated_length": 864.1333618164062, + "completions/min_length": 666.0, + "completions/min_terminated_length": 666.0, + "epoch": 0.3670917729432358, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1402013671363216, + "kl": 0.0086669921875, + "learning_rate": 8.185229416314735e-07, + "loss": -0.0602, + "num_tokens": 66238294.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7088519930839539, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08629016546108055, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.056780292183652784, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1468 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1342.375, + "completions/mean_terminated_length": 1306.0, + "completions/min_length": 1095.0, + "completions/min_terminated_length": 1095.0, + "epoch": 0.36734183545886473, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0738998664536084, + "kl": 0.0114288330078125, + "learning_rate": 8.182076299318138e-07, + "loss": -0.0017, + "num_tokens": 66289876.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8276550769805908, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03848577731168483, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10823681210584378, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1469 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1411.0, + "completions/mean_length": 1131.75, + "completions/mean_terminated_length": 1009.0, + "completions/min_length": 810.0, + "completions/min_terminated_length": 810.0, + "epoch": 0.3675918979744936, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8377278864684463, + "kl": 0.004169464111328125, + "learning_rate": 8.178921138668561e-07, + "loss": -0.0595, + "num_tokens": 66323696.0, + "reward": 0.0, + "reward_std": 0.6343519687652588, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02768569789939282, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.053669028521047206, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1470 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1304.375, + "completions/mean_terminated_length": 1259.2308349609375, + "completions/min_length": 942.0, + "completions/min_terminated_length": 942.0, + "epoch": 0.3678419604901225, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6847224457331103, + "kl": 0.00899505615234375, + "learning_rate": 8.175763936770131e-07, + "loss": -0.0238, + "num_tokens": 66375766.0, + "reward": 0.0, + "reward_std": 0.38029903173446655, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06510170448450779, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1156483409153016, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1355373393953503, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1471 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1012.0, + "completions/mean_length": 1111.5625, + "completions/mean_terminated_length": 723.125, + "completions/min_length": 494.0, + "completions/min_terminated_length": 494.0, + "epoch": 0.36809202300575145, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8069266552857353, + "kl": 0.0065460205078125, + "learning_rate": 8.172604696028531e-07, + "loss": -0.0262, + "num_tokens": 66410511.0, + "reward": 0.0, + "reward_std": 0.8819104433059692, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1498032591239151, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09279992006368025, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316813, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1472 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1117.5625, + "completions/mean_terminated_length": 1029.3077392578125, + "completions/min_length": 570.0, + "completions/min_terminated_length": 570.0, + "epoch": 0.36834208552138037, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.644968635416195, + "kl": 0.0078125, + "learning_rate": 8.169443418850998e-07, + "loss": -0.0486, + "num_tokens": 66457152.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9662089943885803, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0875109971748991, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1344026072124897, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06070572613176774, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1473 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1356.5625, + "completions/mean_terminated_length": 1291.3636474609375, + "completions/min_length": 1114.0, + "completions/min_terminated_length": 1114.0, + "epoch": 0.36859214803700924, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7015097364510945, + "kl": 0.0101318359375, + "learning_rate": 8.166280107646319e-07, + "loss": -0.0491, + "num_tokens": 66519609.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0009000301361084, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01923584796023682, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.061773874636499033, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06440611887195309, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1474 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1352.6875, + "completions/mean_terminated_length": 1285.727294921875, + "completions/min_length": 1153.0, + "completions/min_terminated_length": 1153.0, + "epoch": 0.36884221055263816, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.98400679837669, + "kl": 0.0141143798828125, + "learning_rate": 8.163114764824833e-07, + "loss": 0.0129, + "num_tokens": 66566628.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.924269437789917, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04650101178648085, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16028170734609917, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1261979632400061, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1475 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.9375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1390.0, + "completions/mean_length": 1493.125, + "completions/mean_terminated_length": 1390.0, + "completions/min_length": 1390.0, + "completions/min_terminated_length": 1390.0, + "epoch": 0.3690922730682671, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.226482552192988, + "kl": 0.008575439453125, + "learning_rate": 8.159947392798425e-07, + "loss": 0.0027, + "num_tokens": 66631718.0, + "reward": 0.0, + "reward_std": 0.8646396398544312, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.3163578016574553, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3220599813246726, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08255189164891874, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1476 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1114.125, + "completions/mean_terminated_length": 1088.4000244140625, + "completions/min_length": 847.0, + "completions/min_terminated_length": 847.0, + "epoch": 0.36934233558389595, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7743134403337577, + "kl": 0.00928497314453125, + "learning_rate": 8.15677799398053e-07, + "loss": -0.0149, + "num_tokens": 66669136.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9292417168617249, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05043978033854209, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0826362088876191, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11603000888978234, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1477 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1424.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1080.3125, + "completions/mean_terminated_length": 1080.3125, + "completions/min_length": 789.0, + "completions/min_terminated_length": 789.0, + "epoch": 0.3695923980995249, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.07967792226581, + "kl": 0.0126800537109375, + "learning_rate": 8.153606570786121e-07, + "loss": -0.0504, + "num_tokens": 66702965.0, + "reward": 0.0, + "reward_std": 1.025867223739624, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06770092739403925, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11146552689690634, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08062257748298553, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1478 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1368.0, + "completions/mean_length": 1153.3125, + "completions/mean_terminated_length": 1073.3077392578125, + "completions/min_length": 259.0, + "completions/min_terminated_length": 259.0, + "epoch": 0.3698424606151538, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6880141072941703, + "kl": 0.0138092041015625, + "learning_rate": 8.150433125631718e-07, + "loss": -0.1071, + "num_tokens": 66759026.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9229059219360352, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07954707822066091, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05587452670385202, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08255189164891873, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1479 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1322.8125, + "completions/mean_terminated_length": 1281.923095703125, + "completions/min_length": 939.0, + "completions/min_terminated_length": 939.0, + "epoch": 0.37009252313078267, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.034012616848547, + "kl": 0.014892578125, + "learning_rate": 8.147257660935383e-07, + "loss": -0.0091, + "num_tokens": 66806191.0, + "reward": 0.0, + "reward_std": 0.9798064231872559, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10045901164061402, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14933985050765805, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06763190130459201, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1480 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1149.0, + "completions/mean_length": 1236.375, + "completions/mean_terminated_length": 972.75, + "completions/min_length": 699.0, + "completions/min_terminated_length": 699.0, + "epoch": 0.3703425856464116, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5409644529927524, + "kl": 0.010467529296875, + "learning_rate": 8.14408017911671e-07, + "loss": -0.0033, + "num_tokens": 66856925.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7890657186508179, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.2475, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2556169008496895, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13109227736669, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1481 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1311.0, + "completions/max_terminated_length": 1311.0, + "completions/mean_length": 967.3125, + "completions/mean_terminated_length": 967.3125, + "completions/min_length": 608.0, + "completions/min_terminated_length": 608.0, + "epoch": 0.3705926481620405, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.159707302375429, + "kl": 0.00733184814453125, + "learning_rate": 8.140900682596837e-07, + "loss": -0.0301, + "num_tokens": 66896074.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.958022952079773, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08535164512684083, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10495278603660473, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12852438880818895, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1482 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1348.1875, + "completions/mean_terminated_length": 1297.5833740234375, + "completions/min_length": 1094.0, + "completions/min_terminated_length": 1094.0, + "epoch": 0.37084271067766944, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1200896833055696, + "kl": 0.0114593505859375, + "learning_rate": 8.137719173798436e-07, + "loss": 0.0191, + "num_tokens": 66935869.0, + "reward": -3.725290298461914e-08, + "reward_std": 1.0285968780517578, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03899332616435374, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09667800228010007, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04791968589521743, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1483 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1107.0625, + "completions/mean_terminated_length": 1080.86669921875, + "completions/min_length": 650.0, + "completions/min_terminated_length": 650.0, + "epoch": 0.3710927731932983, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6085143018811023, + "kl": 0.0133056640625, + "learning_rate": 8.134535655145711e-07, + "loss": 0.001, + "num_tokens": 66976414.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0405633449554443, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01668086756562567, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02996964384376398, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1484 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1208.25, + "completions/mean_terminated_length": 1111.0, + "completions/min_length": 604.0, + "completions/min_terminated_length": 604.0, + "epoch": 0.37134283570892723, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7198793436497715, + "kl": 0.0116729736328125, + "learning_rate": 8.131350129064394e-07, + "loss": -0.0878, + "num_tokens": 67021890.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7507574558258057, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.050113302134811036, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.038438496090298736, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11792967144619462, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1485 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1377.0, + "completions/mean_length": 1265.75, + "completions/mean_terminated_length": 1159.272705078125, + "completions/min_length": 1023.0, + "completions/min_terminated_length": 1023.0, + "epoch": 0.37159289822455616, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5467799047052453, + "kl": 0.01568603515625, + "learning_rate": 8.128162597981754e-07, + "loss": -0.0449, + "num_tokens": 67076062.0, + "reward": 0.0, + "reward_std": 0.9870882034301758, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10702113117278378, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1351059435634634, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11800816042090448, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1486 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1248.0, + "completions/max_terminated_length": 1248.0, + "completions/mean_length": 969.375, + "completions/mean_terminated_length": 969.375, + "completions/min_length": 476.0, + "completions/min_terminated_length": 476.0, + "epoch": 0.371842960740185, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.192411464386482, + "kl": 0.01611328125, + "learning_rate": 8.124973064326582e-07, + "loss": 0.0281, + "num_tokens": 67126292.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0641274452209473, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08588414087241236, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09168779598711677, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1487 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1476.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1010.0, + "completions/mean_terminated_length": 1010.0, + "completions/min_length": 812.0, + "completions/min_terminated_length": 812.0, + "epoch": 0.37209302325581395, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9359519465305293, + "kl": 0.0154571533203125, + "learning_rate": 8.121781530529197e-07, + "loss": -0.0854, + "num_tokens": 67166444.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9828296303749084, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.004410347234990945, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06537513238270655, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13102162671355697, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1488 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1024.5, + "completions/mean_terminated_length": 1024.5, + "completions/min_length": 557.0, + "completions/min_terminated_length": 557.0, + "epoch": 0.37234308577144287, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3583826484739094, + "kl": 0.0146942138671875, + "learning_rate": 8.118587999021438e-07, + "loss": -0.0412, + "num_tokens": 67203636.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0050976276397705, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.043582074146309005, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07939539718192079, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08421753138505425, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1489 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1049.125, + "completions/mean_terminated_length": 698.4444580078125, + "completions/min_length": 314.0, + "completions/min_terminated_length": 314.0, + "epoch": 0.37259314828707174, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7042334023989043, + "kl": 0.01067352294921875, + "learning_rate": 8.115392472236675e-07, + "loss": -0.0082, + "num_tokens": 67240166.0, + "reward": 0.0, + "reward_std": 0.6294206380844116, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1446032892439471, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17899136193366913, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1112221667221529, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1490 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1085.0, + "completions/max_terminated_length": 1085.0, + "completions/mean_length": 890.4375, + "completions/mean_terminated_length": 890.4375, + "completions/min_length": 643.0, + "completions/min_terminated_length": 643.0, + "epoch": 0.37284321080270066, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.110970106402351, + "kl": 0.00811004638671875, + "learning_rate": 8.112194952609791e-07, + "loss": -0.0164, + "num_tokens": 67272917.0, + "reward": 0.0, + "reward_std": 0.49520978331565857, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03764818936440118, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0673276592796193, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1491 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1417.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1121.375, + "completions/mean_terminated_length": 1121.375, + "completions/min_length": 788.0, + "completions/min_terminated_length": 788.0, + "epoch": 0.3730932733183296, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.618822980465436, + "kl": 0.0141143798828125, + "learning_rate": 8.10899544257719e-07, + "loss": -0.0186, + "num_tokens": 67320947.0, + "reward": 0.0, + "reward_std": 0.403276264667511, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.5565982857639202, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.4318334254869241, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10183501544346314, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1492 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1394.0, + "completions/max_terminated_length": 1394.0, + "completions/mean_length": 952.125, + "completions/mean_terminated_length": 952.125, + "completions/min_length": 626.0, + "completions/min_terminated_length": 626.0, + "epoch": 0.3733433358339585, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4806236775947808, + "kl": 0.01812744140625, + "learning_rate": 8.105793944576792e-07, + "loss": 0.0125, + "num_tokens": 67371613.0, + "reward": 0.0, + "reward_std": 0.7708301544189453, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05433399823060825, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09403775287628156, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07302967433402216, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1493 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1387.0, + "completions/mean_length": 1144.8125, + "completions/mean_terminated_length": 1121.1334228515625, + "completions/min_length": 880.0, + "completions/min_terminated_length": 880.0, + "epoch": 0.3735933983495874, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2089583601088534, + "kl": 0.0136566162109375, + "learning_rate": 8.102590461048032e-07, + "loss": -0.023, + "num_tokens": 67412306.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9908801317214966, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06216505357891488, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09785577115996501, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10470416879457553, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1494 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1368.875, + "completions/mean_terminated_length": 1309.2728271484375, + "completions/min_length": 820.0, + "completions/min_terminated_length": 820.0, + "epoch": 0.3738434608652163, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.460649304887129, + "kl": 0.0145263671875, + "learning_rate": 8.099384994431858e-07, + "loss": -0.0754, + "num_tokens": 67460680.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8378720283508301, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04279961361727562, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09854692388571741, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1495 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1299.375, + "completions/mean_terminated_length": 1270.71435546875, + "completions/min_length": 950.0, + "completions/min_terminated_length": 950.0, + "epoch": 0.3740935233808452, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7534246952939903, + "kl": 0.0113067626953125, + "learning_rate": 8.09617754717073e-07, + "loss": 0.0135, + "num_tokens": 67502654.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5559778213500977, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06438180179899364, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13941362445909422, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08509254221575908, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1496 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1276.0, + "completions/max_terminated_length": 1276.0, + "completions/mean_length": 1091.8125, + "completions/mean_terminated_length": 1091.8125, + "completions/min_length": 671.0, + "completions/min_terminated_length": 671.0, + "epoch": 0.3743435858964741, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.156055738377159, + "kl": 0.0145721435546875, + "learning_rate": 8.092968121708615e-07, + "loss": -0.0479, + "num_tokens": 67543427.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.8373007774353027, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.16550460169206993, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19651028438389576, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11603000888978234, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1497 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1395.875, + "completions/mean_terminated_length": 1291.75, + "completions/min_length": 700.0, + "completions/min_terminated_length": 700.0, + "epoch": 0.374593648412103, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.709712017376254, + "kl": 0.01324462890625, + "learning_rate": 8.089756720490986e-07, + "loss": 0.0479, + "num_tokens": 67601153.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9794222116470337, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04397759290929448, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10624840063864831, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0969917904124231, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1498 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1283.1875, + "completions/mean_terminated_length": 1210.916748046875, + "completions/min_length": 1028.0, + "completions/min_terminated_length": 1028.0, + "epoch": 0.37484371092773194, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0421288364777985, + "kl": 0.00801849365234375, + "learning_rate": 8.086543345964832e-07, + "loss": -0.0088, + "num_tokens": 67646628.0, + "reward": 0.0, + "reward_std": 0.7523441910743713, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.004230870015823505, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03686867938497357, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08933913745655642, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1499 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1377.0, + "completions/max_terminated_length": 1377.0, + "completions/mean_length": 1084.5625, + "completions/mean_terminated_length": 1084.5625, + "completions/min_length": 718.0, + "completions/min_terminated_length": 718.0, + "epoch": 0.37509377344336087, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.251795525853553, + "kl": 0.0135345458984375, + "learning_rate": 8.083328000578629e-07, + "loss": 0.0533, + "num_tokens": 67696165.0, + "reward": -1.862645149230957e-08, + "reward_std": 1.0655055046081543, + "rewards/wordcountpos_reward_GEOBench/mean": -1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.024671821095989845, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1261924562144704, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590968, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1500 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1433.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1097.1875, + "completions/mean_terminated_length": 1097.1875, + "completions/min_length": 721.0, + "completions/min_terminated_length": 721.0, + "epoch": 0.37534383595898974, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2334463273784357, + "kl": 0.011383056640625, + "learning_rate": 8.080110686782367e-07, + "loss": 0.0221, + "num_tokens": 67732552.0, + "reward": 1.1175870895385742e-08, + "reward_std": 0.9996165633201599, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0650167755210288, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13535534392348658, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10390522747338701, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1501 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1373.625, + "completions/mean_terminated_length": 1211.1429443359375, + "completions/min_length": 865.0, + "completions/min_terminated_length": 865.0, + "epoch": 0.37559389847461866, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7307020718798576, + "kl": 0.0122222900390625, + "learning_rate": 8.076891407027532e-07, + "loss": 0.0076, + "num_tokens": 67778682.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0042811632156372, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.3186788646888212, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1226036520035022, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237135, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1502 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1371.0, + "completions/mean_length": 1298.25, + "completions/mean_terminated_length": 1096.5, + "completions/min_length": 614.0, + "completions/min_terminated_length": 614.0, + "epoch": 0.3758439609902476, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8400736759999248, + "kl": 0.008941650390625, + "learning_rate": 8.073670163767108e-07, + "loss": 0.0319, + "num_tokens": 67843486.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5219864845275879, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09414104471157134, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13567276591093033, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10183501544346314, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1503 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1243.0, + "completions/mean_terminated_length": 1225.86669921875, + "completions/min_length": 1110.0, + "completions/min_terminated_length": 1110.0, + "epoch": 0.37609402350587645, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.348208006909992, + "kl": 0.00800323486328125, + "learning_rate": 8.070446959455576e-07, + "loss": -0.0108, + "num_tokens": 67887246.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8852283954620361, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00027177319072115404, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08869963980569558, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06763190130459201, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1504 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1236.0, + "completions/max_terminated_length": 1236.0, + "completions/mean_length": 978.5, + "completions/mean_terminated_length": 978.5, + "completions/min_length": 738.0, + "completions/min_terminated_length": 738.0, + "epoch": 0.3763440860215054, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9645794268632315, + "kl": 0.017425537109375, + "learning_rate": 8.067221796548909e-07, + "loss": 0.0061, + "num_tokens": 67936390.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9736731052398682, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04846241888701107, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09554238811428035, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11547005383792518, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1505 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1483.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1157.9375, + "completions/mean_terminated_length": 1157.9375, + "completions/min_length": 548.0, + "completions/min_terminated_length": 548.0, + "epoch": 0.3765941485371343, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1846604023959806, + "kl": 0.013702392578125, + "learning_rate": 8.063994677504574e-07, + "loss": -0.0964, + "num_tokens": 67990613.0, + "reward": 0.0, + "reward_std": 0.7812597751617432, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.058717674941821445, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0809720037723865, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06206328908341753, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1506 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.9375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1499.6875, + "completions/mean_terminated_length": 1495.0, + "completions/min_length": 1495.0, + "completions/min_terminated_length": 1495.0, + "epoch": 0.37684421105276317, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.219973061899923, + "kl": 0.00788116455078125, + "learning_rate": 8.06076560478153e-07, + "loss": 0.0002, + "num_tokens": 68052928.0, + "reward": 0.0, + "reward_std": 0.7832177877426147, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04107543220520439, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09013381930997988, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08255189164891874, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 1507 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1274.0, + "completions/max_terminated_length": 1274.0, + "completions/mean_length": 1072.375, + "completions/mean_terminated_length": 1072.375, + "completions/min_length": 803.0, + "completions/min_terminated_length": 803.0, + "epoch": 0.3770942735683921, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.020656569025146, + "kl": 0.01727294921875, + "learning_rate": 8.057534580840222e-07, + "loss": 0.0243, + "num_tokens": 68102998.0, + "reward": 0.0, + "reward_std": 0.6298606395721436, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.16266136414647483, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.24030500590478454, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10749676997731401, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1508 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1443.5625, + "completions/mean_terminated_length": 1349.5, + "completions/min_length": 1230.0, + "completions/min_terminated_length": 1230.0, + "epoch": 0.377344336084021, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7210732209466357, + "kl": 0.0102691650390625, + "learning_rate": 8.054301608142584e-07, + "loss": -0.0002, + "num_tokens": 68154103.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7782875299453735, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04103824189678752, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06251211161722262, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10183501544346313, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1509 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1355.0, + "completions/mean_length": 1303.0, + "completions/mean_terminated_length": 1149.77783203125, + "completions/min_length": 961.0, + "completions/min_terminated_length": 961.0, + "epoch": 0.37759439859964994, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.220916145343941, + "kl": 0.0116729736328125, + "learning_rate": 8.051066689152032e-07, + "loss": -0.0115, + "num_tokens": 68203159.0, + "reward": 0.0, + "reward_std": 0.6027798652648926, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.47876945296107587, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.4093736363543239, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11155467020454343, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1510 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1293.1875, + "completions/mean_terminated_length": 1199.181884765625, + "completions/min_length": 915.0, + "completions/min_terminated_length": 915.0, + "epoch": 0.3778444611152788, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9154250606698953, + "kl": 0.0136871337890625, + "learning_rate": 8.047829826333468e-07, + "loss": -0.0117, + "num_tokens": 68258490.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5005489587783813, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.030438389440754998, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1275974043063942, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921948, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1511 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1481.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1114.5, + "completions/mean_terminated_length": 1114.5, + "completions/min_length": 815.0, + "completions/min_terminated_length": 815.0, + "epoch": 0.37809452363090773, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2041140171462636, + "kl": 0.0121002197265625, + "learning_rate": 8.044591022153271e-07, + "loss": 0.0029, + "num_tokens": 68299882.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6996527314186096, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05209552908714723, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11373225665105001, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466157, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1512 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1164.3125, + "completions/mean_terminated_length": 1011.727294921875, + "completions/min_length": 463.0, + "completions/min_terminated_length": 463.0, + "epoch": 0.37834458614653665, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1115256033163563, + "kl": 0.0117034912109375, + "learning_rate": 8.041350279079308e-07, + "loss": 0.0226, + "num_tokens": 68362855.0, + "reward": 0.0, + "reward_std": 0.587918758392334, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.2251504634017874, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.31107009530510876, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06540472290116195, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1513 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1297.25, + "completions/mean_terminated_length": 1268.2857666015625, + "completions/min_length": 909.0, + "completions/min_terminated_length": 909.0, + "epoch": 0.3785946486621655, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5348280305910107, + "kl": 0.0103912353515625, + "learning_rate": 8.038107599580912e-07, + "loss": -0.0224, + "num_tokens": 68404931.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0149377584457397, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04420001348486697, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06397232764043675, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0824396524513313, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1514 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1350.875, + "completions/mean_terminated_length": 1261.4000244140625, + "completions/min_length": 994.0, + "completions/min_terminated_length": 994.0, + "epoch": 0.37884471117779445, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8256563445060916, + "kl": 0.0097503662109375, + "learning_rate": 8.034862986128899e-07, + "loss": 0.0124, + "num_tokens": 68451569.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0613123178482056, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005891360754825023, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05032037060450219, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05900408021045227, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1515 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1193.0, + "completions/max_terminated_length": 1193.0, + "completions/mean_length": 867.375, + "completions/mean_terminated_length": 867.375, + "completions/min_length": 484.0, + "completions/min_terminated_length": 484.0, + "epoch": 0.37909477369342337, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.233790335307018, + "kl": 0.0152130126953125, + "learning_rate": 8.031616441195557e-07, + "loss": -0.0971, + "num_tokens": 68484887.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8392364382743835, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.025828784867413017, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05233867602316305, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1516 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1451.0, + "completions/mean_length": 1277.9375, + "completions/mean_terminated_length": 1144.7000732421875, + "completions/min_length": 936.0, + "completions/min_terminated_length": 936.0, + "epoch": 0.37934483620905224, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0148452198909883, + "kl": 0.00974273681640625, + "learning_rate": 8.028367967254645e-07, + "loss": -0.0303, + "num_tokens": 68540030.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7211209535598755, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.023370002906762055, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0702720677660387, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1517 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1139.375, + "completions/mean_terminated_length": 1115.3333740234375, + "completions/min_length": 795.0, + "completions/min_terminated_length": 795.0, + "epoch": 0.37959489872468116, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.452355036975796, + "kl": 0.00860595703125, + "learning_rate": 8.025117566781392e-07, + "loss": -0.0003, + "num_tokens": 68574188.0, + "reward": 0.0, + "reward_std": 0.6125326752662659, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.008733415841786375, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06260698132745086, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06978803887752094, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1518 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1348.3125, + "completions/mean_terminated_length": 1196.625, + "completions/min_length": 770.0, + "completions/min_terminated_length": 770.0, + "epoch": 0.3798449612403101, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8405081810792545, + "kl": 0.01214599609375, + "learning_rate": 8.021865242252494e-07, + "loss": -0.0045, + "num_tokens": 68631065.0, + "reward": 0.0, + "reward_std": 1.0255464315414429, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.14792016423823884, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08927107498849876, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1102186379345533, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1519 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1203.0, + "completions/max_terminated_length": 1203.0, + "completions/mean_length": 1023.125, + "completions/mean_terminated_length": 1023.125, + "completions/min_length": 858.0, + "completions/min_terminated_length": 858.0, + "epoch": 0.380095023755939, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.636966712510466, + "kl": 0.01470947265625, + "learning_rate": 8.018610996146117e-07, + "loss": -0.0049, + "num_tokens": 68674395.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0430679321289062, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.017253022908865063, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07571907068909264, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08073734277593314, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1520 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1382.0, + "completions/max_terminated_length": 1382.0, + "completions/mean_length": 1280.6875, + "completions/mean_terminated_length": 1280.6875, + "completions/min_length": 1142.0, + "completions/min_terminated_length": 1142.0, + "epoch": 0.3803450862715679, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9709049959682816, + "kl": 0.00739288330078125, + "learning_rate": 8.015354830941886e-07, + "loss": -0.0138, + "num_tokens": 68719950.0, + "reward": 0.0, + "reward_std": 0.46789103746414185, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.061114991257755706, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16249721982762558, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14605934866804432, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1521 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1316.0, + "completions/mean_length": 1088.75, + "completions/mean_terminated_length": 993.84619140625, + "completions/min_length": 733.0, + "completions/min_terminated_length": 733.0, + "epoch": 0.3805951487871968, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.741369233140328, + "kl": 0.011932373046875, + "learning_rate": 8.01209674912089e-07, + "loss": 0.0481, + "num_tokens": 68760770.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5776246786117554, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.055223881022627715, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08383175045602988, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0687184270936277, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1522 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1305.0, + "completions/mean_length": 1104.0, + "completions/mean_terminated_length": 924.0, + "completions/min_length": 726.0, + "completions/min_terminated_length": 726.0, + "epoch": 0.3808452113028257, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2226844165623802, + "kl": 0.014312744140625, + "learning_rate": 8.008836753165683e-07, + "loss": -0.0022, + "num_tokens": 68802690.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.010846734046936, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.037969359112717704, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05612429218483967, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11279282877125757, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1523 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1014.8125, + "completions/mean_terminated_length": 982.4667358398438, + "completions/min_length": 763.0, + "completions/min_terminated_length": 763.0, + "epoch": 0.3810952738184546, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3873992733937848, + "kl": 0.011505126953125, + "learning_rate": 8.005574845560269e-07, + "loss": 0.0886, + "num_tokens": 68836327.0, + "reward": 0.0, + "reward_std": 1.0215191841125488, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05771249916112181, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1099367143688324, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08432740427115681, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1524 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1029.4375, + "completions/mean_terminated_length": 998.0667114257812, + "completions/min_length": 740.0, + "completions/min_terminated_length": 740.0, + "epoch": 0.3813453363340835, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4792130337615075, + "kl": 0.013092041015625, + "learning_rate": 8.002311028790117e-07, + "loss": -0.0507, + "num_tokens": 68880734.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.674964189529419, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.046363424061961624, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11199647127879539, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1525 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1305.0, + "completions/mean_length": 1114.1875, + "completions/mean_terminated_length": 1088.4666748046875, + "completions/min_length": 819.0, + "completions/min_terminated_length": 819.0, + "epoch": 0.38159539884971244, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.984765399606739, + "kl": 0.019134521484375, + "learning_rate": 7.999045305342142e-07, + "loss": -0.036, + "num_tokens": 68920697.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0495131015777588, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10537732988701976, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06698392248922437, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09727776191382574, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1526 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1299.0, + "completions/max_terminated_length": 1299.0, + "completions/mean_length": 1147.5, + "completions/mean_terminated_length": 1147.5, + "completions/min_length": 959.0, + "completions/min_terminated_length": 959.0, + "epoch": 0.3818454613653413, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.269037563583943, + "kl": 0.0169219970703125, + "learning_rate": 7.995777677704721e-07, + "loss": -0.0178, + "num_tokens": 68969953.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0467830896377563, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09205020756166254, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0762568950576228, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15533714826025882, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1527 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1265.375, + "completions/mean_terminated_length": 1249.7333984375, + "completions/min_length": 1100.0, + "completions/min_terminated_length": 1100.0, + "epoch": 0.38209552388097023, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3643228037284914, + "kl": 0.00876617431640625, + "learning_rate": 7.992508148367675e-07, + "loss": -0.0002, + "num_tokens": 69014207.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9657981395721436, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010443583321717128, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1179563406087079, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042252, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1528 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 1180.625, + "completions/mean_terminated_length": 1135.0, + "completions/min_length": 780.0, + "completions/min_terminated_length": 780.0, + "epoch": 0.38234558639659916, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6007287691951193, + "kl": 0.0118408203125, + "learning_rate": 7.989236719822281e-07, + "loss": -0.0422, + "num_tokens": 69048865.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8488974571228027, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.22368193448282853, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.27347455219068484, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1529 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1410.0, + "completions/mean_length": 1320.3125, + "completions/mean_terminated_length": 1140.625, + "completions/min_length": 821.0, + "completions/min_terminated_length": 821.0, + "epoch": 0.3825956489122281, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.474841421770269, + "kl": 0.0150146484375, + "learning_rate": 7.985963394561255e-07, + "loss": 0.0365, + "num_tokens": 69111750.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.35294660925865173, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.036227931332993873, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.048291867922819795, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16187558093703852, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1530 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1300.0, + "completions/mean_length": 1175.0, + "completions/mean_terminated_length": 1066.666748046875, + "completions/min_length": 969.0, + "completions/min_terminated_length": 969.0, + "epoch": 0.38284571142785695, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.482022334177577, + "kl": 0.0078582763671875, + "learning_rate": 7.982688175078761e-07, + "loss": -0.0044, + "num_tokens": 69165070.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8644785284996033, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0934946438716615, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11061374341676947, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1531 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 1313.75, + "completions/mean_terminated_length": 1202.0, + "completions/min_length": 637.0, + "completions/min_terminated_length": 637.0, + "epoch": 0.3830957739434859, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8953010219268456, + "kl": 0.009063720703125, + "learning_rate": 7.97941106387041e-07, + "loss": 0.0472, + "num_tokens": 69213842.0, + "reward": 0.0, + "reward_std": 0.22906708717346191, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.009169768265782013, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.011061276026806524, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12292725943057181, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1532 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1273.125, + "completions/mean_terminated_length": 1240.71435546875, + "completions/min_length": 946.0, + "completions/min_terminated_length": 946.0, + "epoch": 0.3833458364591148, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9088864320531216, + "kl": 0.0159149169921875, + "learning_rate": 7.97613206343325e-07, + "loss": 0.0012, + "num_tokens": 69263156.0, + "reward": 0.0, + "reward_std": 0.8067635297775269, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.040168063642319625, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03867098459769607, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901862, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1533 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1423.1875, + "completions/mean_terminated_length": 1388.2728271484375, + "completions/min_length": 1262.0, + "completions/min_terminated_length": 1262.0, + "epoch": 0.38359589897474367, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.7880308629697426, + "kl": 0.0056095123291015625, + "learning_rate": 7.972851176265774e-07, + "loss": -0.0234, + "num_tokens": 69314087.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.613574743270874, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03644807799150852, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0506480468998148, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1102186379345533, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1534 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1157.0, + "completions/mean_length": 1132.0, + "completions/mean_terminated_length": 964.727294921875, + "completions/min_length": 709.0, + "completions/min_terminated_length": 709.0, + "epoch": 0.3838459614903726, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.387119615683856, + "kl": 0.01026153564453125, + "learning_rate": 7.969568404867902e-07, + "loss": -0.0402, + "num_tokens": 69360823.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6649572849273682, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.020197060831179486, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09495606693392188, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590966, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1535 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1378.0, + "completions/mean_length": 1013.125, + "completions/mean_terminated_length": 900.769287109375, + "completions/min_length": 662.0, + "completions/min_terminated_length": 662.0, + "epoch": 0.3840960240060015, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7966842760367943, + "kl": 0.009765625, + "learning_rate": 7.966283751741005e-07, + "loss": 0.0481, + "num_tokens": 69393145.0, + "reward": 0.0, + "reward_std": 0.9594006538391113, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12282931514932709, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16024511767457397, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10470416879457554, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1536 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1215.4375, + "completions/mean_terminated_length": 1120.5833740234375, + "completions/min_length": 953.0, + "completions/min_terminated_length": 953.0, + "epoch": 0.3843460865216304, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6977718213200035, + "kl": 0.0109710693359375, + "learning_rate": 7.962997219387873e-07, + "loss": -0.0326, + "num_tokens": 69441416.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0026373863220215, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11610517229416792, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08976505809781637, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05443310539518176, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1537 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1405.0625, + "completions/mean_terminated_length": 1348.0999755859375, + "completions/min_length": 1045.0, + "completions/min_terminated_length": 1045.0, + "epoch": 0.3845961490372593, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.009485770158174, + "kl": 0.014495849609375, + "learning_rate": 7.959708810312737e-07, + "loss": 0.005, + "num_tokens": 69500905.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0688321590423584, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00013543747776446641, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05202515846693297, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11287488977066931, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1538 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1413.0, + "completions/max_terminated_length": 1413.0, + "completions/mean_length": 969.75, + "completions/mean_terminated_length": 969.75, + "completions/min_length": 698.0, + "completions/min_terminated_length": 698.0, + "epoch": 0.38484621155288823, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.173967799863722, + "kl": 0.0096588134765625, + "learning_rate": 7.956418527021257e-07, + "loss": -0.0522, + "num_tokens": 69530389.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6874761581420898, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.052862471094238146, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05833966618142554, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09583937179043481, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1539 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1356.0, + "completions/mean_length": 1390.1875, + "completions/mean_terminated_length": 1280.375, + "completions/min_length": 1185.0, + "completions/min_terminated_length": 1185.0, + "epoch": 0.38509627406851715, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6758479424321364, + "kl": 0.0139923095703125, + "learning_rate": 7.953126372020516e-07, + "loss": -0.0082, + "num_tokens": 69586448.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.006786584854126, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02569992624442275, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1037338175582625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07685966046898342, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1540 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1450.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 995.25, + "completions/mean_terminated_length": 995.25, + "completions/min_length": 769.0, + "completions/min_terminated_length": 769.0, + "epoch": 0.385346336584146, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0599804065271545, + "kl": 0.01171875, + "learning_rate": 7.949832347819028e-07, + "loss": -0.0303, + "num_tokens": 69627700.0, + "reward": 0.0, + "reward_std": 0.9018995761871338, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0665276327996565, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1541 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1055.8125, + "completions/mean_terminated_length": 992.357177734375, + "completions/min_length": 631.0, + "completions/min_terminated_length": 631.0, + "epoch": 0.38559639909977494, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.199370137791998, + "kl": 0.0124664306640625, + "learning_rate": 7.946536456926731e-07, + "loss": 0.0016, + "num_tokens": 69663617.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0383566617965698, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03807979016738928, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.061591024174049436, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12852438880818895, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1542 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1288.9375, + "completions/mean_terminated_length": 1258.7857666015625, + "completions/min_length": 1053.0, + "completions/min_terminated_length": 1053.0, + "epoch": 0.38584646161540387, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1161355152481645, + "kl": 0.01483154296875, + "learning_rate": 7.943238701854984e-07, + "loss": -0.0379, + "num_tokens": 69703936.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6780154705047607, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11205105647992485, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16436134712975298, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1299572579307862, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1543 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1443.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1161.625, + "completions/mean_terminated_length": 1161.625, + "completions/min_length": 1008.0, + "completions/min_terminated_length": 1008.0, + "epoch": 0.38609652413103274, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.930114710587476, + "kl": 0.01220703125, + "learning_rate": 7.939939085116566e-07, + "loss": -0.0187, + "num_tokens": 69753666.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0255470275878906, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.029404461036513972, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.054763856082638805, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.20073937405575493, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1544 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1154.0, + "completions/mean_length": 1179.3125, + "completions/mean_terminated_length": 929.888916015625, + "completions/min_length": 736.0, + "completions/min_terminated_length": 736.0, + "epoch": 0.38634658664666166, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.945045404056169, + "kl": 0.010833740234375, + "learning_rate": 7.936637609225676e-07, + "loss": -0.0101, + "num_tokens": 69796215.0, + "reward": 0.0, + "reward_std": 0.78216952085495, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09829492112493629, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10612996137720844, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12102953419784838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1545 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1292.125, + "completions/mean_terminated_length": 1244.1539306640625, + "completions/min_length": 819.0, + "completions/min_terminated_length": 819.0, + "epoch": 0.3865966491622906, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.977782636266777, + "kl": 0.0125579833984375, + "learning_rate": 7.933334276697927e-07, + "loss": -0.0206, + "num_tokens": 69854097.0, + "reward": 0.0, + "reward_std": 0.6692916750907898, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0240093613911875, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05450274276972827, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13326387079497304, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1546 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1278.0, + "completions/max_terminated_length": 1278.0, + "completions/mean_length": 1009.25, + "completions/mean_terminated_length": 1009.25, + "completions/min_length": 500.0, + "completions/min_terminated_length": 500.0, + "epoch": 0.38684671167791945, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.198173858390861, + "kl": 0.0142669677734375, + "learning_rate": 7.930029090050351e-07, + "loss": -0.0277, + "num_tokens": 69896157.0, + "reward": 0.0, + "reward_std": 1.029550313949585, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0036785824963070602, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08806846155803835, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09259629622222522, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1547 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1288.625, + "completions/mean_terminated_length": 1258.4285888671875, + "completions/min_length": 915.0, + "completions/min_terminated_length": 915.0, + "epoch": 0.3870967741935484, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2501062512484933, + "kl": 0.0126190185546875, + "learning_rate": 7.92672205180139e-07, + "loss": 0.0204, + "num_tokens": 69945391.0, + "reward": 0.0, + "reward_std": 0.9966356158256531, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07466540805234946, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11391926181804711, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09727776191382574, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1548 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1105.5625, + "completions/mean_terminated_length": 1079.2667236328125, + "completions/min_length": 816.0, + "completions/min_terminated_length": 816.0, + "epoch": 0.3873468367091773, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6380615767284716, + "kl": 0.0097198486328125, + "learning_rate": 7.923413164470896e-07, + "loss": -0.0058, + "num_tokens": 69989736.0, + "reward": 0.0, + "reward_std": 0.40065422654151917, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00609191695273857, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16562476083474484, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14605934866804432, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1549 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1042.0, + "completions/max_terminated_length": 1042.0, + "completions/mean_length": 824.6875, + "completions/mean_terminated_length": 824.6875, + "completions/min_length": 585.0, + "completions/min_terminated_length": 585.0, + "epoch": 0.3875968992248062, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7346455193605936, + "kl": 0.01556396484375, + "learning_rate": 7.920102430580133e-07, + "loss": -0.0416, + "num_tokens": 70023699.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9037766456604004, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.028221555186371625, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09381445001420638, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1550 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1400.0, + "completions/max_terminated_length": 1400.0, + "completions/mean_length": 1018.75, + "completions/mean_terminated_length": 1018.75, + "completions/min_length": 515.0, + "completions/min_terminated_length": 515.0, + "epoch": 0.3878469617404351, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5750471349181403, + "kl": 0.0141448974609375, + "learning_rate": 7.916789852651767e-07, + "loss": -0.008, + "num_tokens": 70054279.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9920423626899719, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04766313880161329, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08574732282395216, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05561108336107646, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1551 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1242.0, + "completions/max_terminated_length": 1242.0, + "completions/mean_length": 1098.1875, + "completions/mean_terminated_length": 1098.1875, + "completions/min_length": 983.0, + "completions/min_terminated_length": 983.0, + "epoch": 0.388097024256064, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1075116618035756, + "kl": 0.0164031982421875, + "learning_rate": 7.913475433209874e-07, + "loss": -0.0234, + "num_tokens": 70098906.0, + "reward": 0.0, + "reward_std": 0.6207641363143921, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.004853839089055521, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11177442018108075, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10390522747338701, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1552 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1261.0, + "completions/max_terminated_length": 1261.0, + "completions/mean_length": 1040.625, + "completions/mean_terminated_length": 1040.625, + "completions/min_length": 807.0, + "completions/min_terminated_length": 807.0, + "epoch": 0.38834708677169294, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.936115691541997, + "kl": 0.0156707763671875, + "learning_rate": 7.910159174779934e-07, + "loss": -0.0185, + "num_tokens": 70136548.0, + "reward": 0.0, + "reward_std": 0.6384569406509399, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08812024515489794, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18316509088406005, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11409872268574492, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1553 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1286.0, + "completions/mean_length": 1086.6875, + "completions/mean_terminated_length": 1027.6429443359375, + "completions/min_length": 742.0, + "completions/min_terminated_length": 742.0, + "epoch": 0.3885971492873218, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8116405473645774, + "kl": 0.0131988525390625, + "learning_rate": 7.906841079888821e-07, + "loss": -0.0125, + "num_tokens": 70183599.0, + "reward": 0.0, + "reward_std": 0.9105096459388733, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.028372282886751463, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11741436584421612, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1554 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1318.0, + "completions/max_terminated_length": 1318.0, + "completions/mean_length": 1112.375, + "completions/mean_terminated_length": 1112.375, + "completions/min_length": 850.0, + "completions/min_terminated_length": 850.0, + "epoch": 0.38884721180295073, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.696143754498351, + "kl": 0.020904541015625, + "learning_rate": 7.903521151064816e-07, + "loss": -0.005, + "num_tokens": 70229701.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9365147948265076, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10839634668040778, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08008422290901122, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715323, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1555 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1206.0, + "completions/mean_length": 1111.25, + "completions/mean_terminated_length": 1085.3333740234375, + "completions/min_length": 898.0, + "completions/min_terminated_length": 898.0, + "epoch": 0.38909727431857966, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4607393952766894, + "kl": 0.00995635986328125, + "learning_rate": 7.900199390837595e-07, + "loss": 0.0303, + "num_tokens": 70281969.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8985007405281067, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.007615319979992872, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.017132075950034763, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7124999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0469436226095058, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1556 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1476.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1070.1875, + "completions/mean_terminated_length": 1070.1875, + "completions/min_length": 683.0, + "completions/min_terminated_length": 683.0, + "epoch": 0.3893473368342086, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.192119326204611, + "kl": 0.0155792236328125, + "learning_rate": 7.896875801738226e-07, + "loss": 0.0186, + "num_tokens": 70317180.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9963094592094421, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.053827606693544305, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.031047116596190152, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12995725793078622, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1557 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1355.0, + "completions/mean_length": 1151.375, + "completions/mean_terminated_length": 1035.166748046875, + "completions/min_length": 897.0, + "completions/min_terminated_length": 897.0, + "epoch": 0.38959739934983745, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0605362282702635, + "kl": 0.01016998291015625, + "learning_rate": 7.893550386299176e-07, + "loss": 0.0237, + "num_tokens": 70360970.0, + "reward": 0.0, + "reward_std": 0.822307825088501, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.049137286556452456, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09281919158475511, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10470416879457554, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1558 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1373.0, + "completions/mean_length": 1147.8125, + "completions/mean_terminated_length": 1124.3333740234375, + "completions/min_length": 832.0, + "completions/min_terminated_length": 832.0, + "epoch": 0.38984746186546637, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4119132243417427, + "kl": 0.01025390625, + "learning_rate": 7.890223147054298e-07, + "loss": 0.0078, + "num_tokens": 70400391.0, + "reward": 0.0, + "reward_std": 0.9648492336273193, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07375646546185156, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15583610606037815, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06652763279965648, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1559 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1157.4375, + "completions/mean_terminated_length": 1134.60009765625, + "completions/min_length": 861.0, + "completions/min_terminated_length": 861.0, + "epoch": 0.3900975243810953, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.046271765818246, + "kl": 0.0121612548828125, + "learning_rate": 7.88689408653884e-07, + "loss": 0.0285, + "num_tokens": 70445502.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9690751433372498, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.008333145453195048, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.047601116755927264, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1560 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1375.0625, + "completions/mean_terminated_length": 1277.888916015625, + "completions/min_length": 745.0, + "completions/min_terminated_length": 745.0, + "epoch": 0.39034758689672416, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.086545784830947, + "kl": 0.0124359130859375, + "learning_rate": 7.883563207289437e-07, + "loss": -0.0024, + "num_tokens": 70492791.0, + "reward": 0.0, + "reward_std": 0.9935873746871948, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.031995489879943356, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0420250342276332, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1753303759784389, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1561 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1146.0, + "completions/max_terminated_length": 1146.0, + "completions/mean_length": 853.3125, + "completions/mean_terminated_length": 853.3125, + "completions/min_length": 480.0, + "completions/min_terminated_length": 480.0, + "epoch": 0.3905976494123531, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.307046977644347, + "kl": 0.0166778564453125, + "learning_rate": 7.880230511844105e-07, + "loss": -0.0231, + "num_tokens": 70541252.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8680849671363831, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.14503922538863734, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16543083069708803, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10318986456114841, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1562 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1384.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1028.8125, + "completions/mean_terminated_length": 1028.8125, + "completions/min_length": 820.0, + "completions/min_terminated_length": 820.0, + "epoch": 0.390847711927982, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8042469320406274, + "kl": 0.0125732421875, + "learning_rate": 7.876896002742251e-07, + "loss": 0.0414, + "num_tokens": 70581273.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9417210221290588, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03321305489464099, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08662662736544528, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0969917904124231, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1563 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1202.6875, + "completions/mean_terminated_length": 1103.5833740234375, + "completions/min_length": 909.0, + "completions/min_terminated_length": 909.0, + "epoch": 0.3910977744436109, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9122882942469808, + "kl": 0.0093536376953125, + "learning_rate": 7.873559682524654e-07, + "loss": -0.0234, + "num_tokens": 70634964.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9018257856369019, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08823758406410961, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0912251235146049, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07698003589195011, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1564 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1430.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1151.8125, + "completions/mean_terminated_length": 1151.8125, + "completions/min_length": 805.0, + "completions/min_terminated_length": 805.0, + "epoch": 0.3913478369592398, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9045083917665608, + "kl": 0.010772705078125, + "learning_rate": 7.870221553733486e-07, + "loss": -0.0082, + "num_tokens": 70679153.0, + "reward": 0.0, + "reward_std": 0.8418854475021362, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.030561426656844203, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08745259261869971, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08933913745655644, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1565 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1203.0625, + "completions/mean_terminated_length": 1183.2667236328125, + "completions/min_length": 832.0, + "completions/min_terminated_length": 832.0, + "epoch": 0.3915978994748687, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4758013109255312, + "kl": 0.00946044921875, + "learning_rate": 7.866881618912285e-07, + "loss": -0.0564, + "num_tokens": 70730970.0, + "reward": 0.0, + "reward_std": 0.8549232482910156, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.005622476234456599, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.027653018778855343, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1344398529978149, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1566 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1370.125, + "completions/mean_terminated_length": 1292.2000732421875, + "completions/min_length": 1078.0, + "completions/min_terminated_length": 1078.0, + "epoch": 0.39184796199049765, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6684323451995815, + "kl": 0.0167388916015625, + "learning_rate": 7.863539880605974e-07, + "loss": -0.0141, + "num_tokens": 70794788.0, + "reward": 0.0, + "reward_std": 0.9611808061599731, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09102662975660893, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11118581803670373, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 1567 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.75, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1484.5, + "completions/mean_terminated_length": 1438.0, + "completions/min_length": 1338.0, + "completions/min_terminated_length": 1338.0, + "epoch": 0.3920980245061265, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.703966536812483, + "kl": 0.0115814208984375, + "learning_rate": 7.860196341360841e-07, + "loss": -0.0029, + "num_tokens": 70862668.0, + "reward": 0.0, + "reward_std": 0.8686152696609497, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0648795689206637, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07343537577626648, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13443985299781488, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1568 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1421.0, + "completions/max_terminated_length": 1421.0, + "completions/mean_length": 1191.3125, + "completions/mean_terminated_length": 1191.3125, + "completions/min_length": 897.0, + "completions/min_terminated_length": 897.0, + "epoch": 0.39234808702175544, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.711262626810369, + "kl": 0.01800537109375, + "learning_rate": 7.856851003724558e-07, + "loss": 0.0098, + "num_tokens": 70912801.0, + "reward": 0.0, + "reward_std": 0.9018560647964478, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.007093282744042525, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09857253621833208, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.23349200953891017, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1569 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1389.6875, + "completions/mean_terminated_length": 1205.8333740234375, + "completions/min_length": 303.0, + "completions/min_terminated_length": 303.0, + "epoch": 0.39259814953738437, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.500379829544296, + "kl": 0.0095977783203125, + "learning_rate": 7.853503870246159e-07, + "loss": 0.1228, + "num_tokens": 70968084.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8541646003723145, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.013089122755529323, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12484871593296057, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08933913745655642, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1570 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1392.0, + "completions/mean_length": 1081.5, + "completions/mean_terminated_length": 1021.71435546875, + "completions/min_length": 556.0, + "completions/min_terminated_length": 556.0, + "epoch": 0.39284821205301323, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.318642276928274, + "kl": 0.0155181884765625, + "learning_rate": 7.850154943476047e-07, + "loss": -0.0488, + "num_tokens": 71007244.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0132873058319092, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.012906011085720415, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06140395009197726, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1571 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1240.9375, + "completions/mean_terminated_length": 1203.9285888671875, + "completions/min_length": 924.0, + "completions/min_terminated_length": 924.0, + "epoch": 0.39309827456864216, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.621190842827133, + "kl": 0.01467132568359375, + "learning_rate": 7.846804225965995e-07, + "loss": -0.0005, + "num_tokens": 71055003.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0427652597427368, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07331346484209832, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08952765572661192, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11538983843829066, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1572 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1152.6875, + "completions/mean_terminated_length": 1129.533447265625, + "completions/min_length": 835.0, + "completions/min_terminated_length": 835.0, + "epoch": 0.3933483370842711, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.137343394513244, + "kl": 0.013397216796875, + "learning_rate": 7.843451720269135e-07, + "loss": -0.0077, + "num_tokens": 71096358.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0010414123535156, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.013009832747679318, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0870719951090613, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09803627446568497, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1573 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1475.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1083.0625, + "completions/mean_terminated_length": 1083.0625, + "completions/min_length": 671.0, + "completions/min_terminated_length": 671.0, + "epoch": 0.39359839959989995, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.755114198954612, + "kl": 0.015350341796875, + "learning_rate": 7.840097428939969e-07, + "loss": -0.0573, + "num_tokens": 71142655.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9134328961372375, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.015127038515126964, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07236254396450188, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07290277645477447, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1574 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1315.0, + "completions/max_terminated_length": 1315.0, + "completions/mean_length": 905.8125, + "completions/mean_terminated_length": 905.8125, + "completions/min_length": 598.0, + "completions/min_terminated_length": 598.0, + "epoch": 0.3938484621155289, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.616438532262193, + "kl": 0.0166015625, + "learning_rate": 7.836741354534353e-07, + "loss": -0.0794, + "num_tokens": 71172572.0, + "reward": -3.3527612686157227e-08, + "reward_std": 1.0148518085479736, + "rewards/wordcountpos_reward_GEOBench/mean": -3.3527612686157227e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0008318433785490425, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.027512442365966134, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258102, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1575 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1091.4375, + "completions/mean_terminated_length": 1064.2000732421875, + "completions/min_length": 749.0, + "completions/min_terminated_length": 749.0, + "epoch": 0.3940985246311578, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.060549715111791, + "kl": 0.0108795166015625, + "learning_rate": 7.833383499609505e-07, + "loss": 0.0, + "num_tokens": 71211987.0, + "reward": 0.0, + "reward_std": 0.9054416418075562, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04890571986567938, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06044507198203013, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07084150279686705, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1576 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1093.875, + "completions/mean_terminated_length": 1066.800048828125, + "completions/min_length": 679.0, + "completions/min_terminated_length": 679.0, + "epoch": 0.3943485871467867, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.584426977678065, + "kl": 0.016448974609375, + "learning_rate": 7.830023866723997e-07, + "loss": 0.0144, + "num_tokens": 71248041.0, + "reward": 0.0, + "reward_std": 0.8814120292663574, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1690216265304657, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13801387999343298, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0969917904124231, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1577 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1369.0, + "completions/max_terminated_length": 1369.0, + "completions/mean_length": 1006.5625, + "completions/mean_terminated_length": 1006.5625, + "completions/min_length": 781.0, + "completions/min_terminated_length": 781.0, + "epoch": 0.3945986496624156, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4501965777809125, + "kl": 0.01690673828125, + "learning_rate": 7.826662458437762e-07, + "loss": 0.0026, + "num_tokens": 71298810.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8568593263626099, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.019375580601435818, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03960883699004363, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1578 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 990.0, + "completions/mean_length": 1079.5, + "completions/mean_terminated_length": 659.0, + "completions/min_length": 501.0, + "completions/min_terminated_length": 501.0, + "epoch": 0.3948487121780445, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.468586600942178, + "kl": 0.008331298828125, + "learning_rate": 7.823299277312077e-07, + "loss": -0.058, + "num_tokens": 71340874.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0263588428497314, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08439596666285983, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09067685988695291, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11894598836509011, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1579 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1206.8125, + "completions/mean_terminated_length": 1109.0833740234375, + "completions/min_length": 414.0, + "completions/min_terminated_length": 414.0, + "epoch": 0.39509877469367344, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8803416373573323, + "kl": 0.0119476318359375, + "learning_rate": 7.819934325909575e-07, + "loss": 0.0743, + "num_tokens": 71391431.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.36634689569473267, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.047075354135333244, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10634502948100887, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15104573749303493, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1580 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1326.0, + "completions/max_terminated_length": 1326.0, + "completions/mean_length": 1023.625, + "completions/mean_terminated_length": 1023.625, + "completions/min_length": 769.0, + "completions/min_terminated_length": 769.0, + "epoch": 0.3953488372093023, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.266789910511784, + "kl": 0.021697998046875, + "learning_rate": 7.816567606794239e-07, + "loss": -0.0181, + "num_tokens": 71448817.0, + "reward": 0.0, + "reward_std": 0.940520167350769, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.018670168034805156, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08201213942629866, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333337, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1581 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1136.75, + "completions/mean_terminated_length": 971.6364135742188, + "completions/min_length": 677.0, + "completions/min_terminated_length": 677.0, + "epoch": 0.39559889972493123, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5881883717617407, + "kl": 0.0191192626953125, + "learning_rate": 7.813199122531395e-07, + "loss": -0.1049, + "num_tokens": 71492789.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9783716201782227, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.044643699278968905, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04258182709884232, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11474609652039006, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1582 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 1294.875, + "completions/mean_terminated_length": 1201.6363525390625, + "completions/min_length": 948.0, + "completions/min_terminated_length": 948.0, + "epoch": 0.39584896224056015, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7852347575261533, + "kl": 0.0149383544921875, + "learning_rate": 7.809828875687715e-07, + "loss": -0.0174, + "num_tokens": 71554171.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0556868314743042, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09011524655223928, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.080002156672278, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298364, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1583 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1363.0, + "completions/mean_length": 1201.0, + "completions/mean_terminated_length": 1158.2857666015625, + "completions/min_length": 679.0, + "completions/min_terminated_length": 679.0, + "epoch": 0.396099024756189, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7066978106428317, + "kl": 0.017364501953125, + "learning_rate": 7.806456868831222e-07, + "loss": 0.0156, + "num_tokens": 71602835.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8677076101303101, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05760585399540517, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09770193770470086, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09953596037316068, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1584 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1325.3125, + "completions/mean_terminated_length": 1285.0, + "completions/min_length": 909.0, + "completions/min_terminated_length": 909.0, + "epoch": 0.39634908727181795, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0357093344650283, + "kl": 0.0122528076171875, + "learning_rate": 7.803083104531264e-07, + "loss": -0.0144, + "num_tokens": 71649384.0, + "reward": 0.0, + "reward_std": 0.9185574650764465, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09856370749971975, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.180412817814222, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1585 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1335.4375, + "completions/mean_terminated_length": 1260.6363525390625, + "completions/min_length": 969.0, + "completions/min_terminated_length": 969.0, + "epoch": 0.39659914978744687, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.831372478212745, + "kl": 0.0124664306640625, + "learning_rate": 7.799707585358542e-07, + "loss": 0.024, + "num_tokens": 71691855.0, + "reward": 0.0, + "reward_std": 0.6088191270828247, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04409484953073255, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12056376130349733, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11547005383792518, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1586 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1206.0625, + "completions/mean_terminated_length": 1186.4666748046875, + "completions/min_length": 870.0, + "completions/min_terminated_length": 870.0, + "epoch": 0.3968492123030758, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3966978890910626, + "kl": 0.0153045654296875, + "learning_rate": 7.796330313885089e-07, + "loss": -0.0237, + "num_tokens": 71734968.0, + "reward": 0.0, + "reward_std": 1.0196411609649658, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04411303523825525, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07017076571658123, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09339283817414602, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1587 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1365.0, + "completions/max_terminated_length": 1365.0, + "completions/mean_length": 1104.125, + "completions/mean_terminated_length": 1104.125, + "completions/min_length": 825.0, + "completions/min_terminated_length": 825.0, + "epoch": 0.39709927481870466, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.73829921992881, + "kl": 0.00870513916015625, + "learning_rate": 7.792951292684273e-07, + "loss": -0.0025, + "num_tokens": 71780410.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.764035701751709, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.018283971807105615, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11891485032187667, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06206328908341755, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1588 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1131.6875, + "completions/mean_terminated_length": 1046.6923828125, + "completions/min_length": 642.0, + "completions/min_terminated_length": 642.0, + "epoch": 0.3973493373343336, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8145997477364975, + "kl": 0.01132965087890625, + "learning_rate": 7.789570524330796e-07, + "loss": -0.0394, + "num_tokens": 71823829.0, + "reward": 0.0, + "reward_std": 0.9908758401870728, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.058373173685013484, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1790824555076345, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1589 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1412.0, + "completions/mean_length": 1341.5, + "completions/mean_terminated_length": 1218.2222900390625, + "completions/min_length": 933.0, + "completions/min_terminated_length": 933.0, + "epoch": 0.3975993998499625, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1959848345499697, + "kl": 0.0117950439453125, + "learning_rate": 7.786188011400694e-07, + "loss": -0.0007, + "num_tokens": 71873005.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9784872531890869, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01726255279336918, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1054995320558551, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04194352464039305, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1590 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1208.5, + "completions/mean_terminated_length": 1189.0667724609375, + "completions/min_length": 316.0, + "completions/min_terminated_length": 316.0, + "epoch": 0.3978494623655914, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.020463345961487, + "kl": 0.0120849609375, + "learning_rate": 7.782803756471323e-07, + "loss": -0.053, + "num_tokens": 71922405.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8330576419830322, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1575313303855437, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0863943379014422, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823629, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1591 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1266.0, + "completions/max_terminated_length": 1266.0, + "completions/mean_length": 1042.5625, + "completions/mean_terminated_length": 1042.5625, + "completions/min_length": 696.0, + "completions/min_terminated_length": 696.0, + "epoch": 0.3980995248812203, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2065951890807614, + "kl": 0.01409912109375, + "learning_rate": 7.779417762121378e-07, + "loss": -0.0058, + "num_tokens": 71959190.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.0026516914367676, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05656119969265448, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06503229516539986, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1592 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1350.0, + "completions/mean_length": 1158.375, + "completions/mean_terminated_length": 1135.60009765625, + "completions/min_length": 868.0, + "completions/min_terminated_length": 868.0, + "epoch": 0.3983495873968492, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.52239167200604, + "kl": 0.0111846923828125, + "learning_rate": 7.776030030930875e-07, + "loss": -0.0049, + "num_tokens": 71999156.0, + "reward": 0.0, + "reward_std": 0.9251689910888672, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05694893771643508, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09856572021607832, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1593 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1020.0, + "completions/mean_length": 1111.5625, + "completions/mean_terminated_length": 878.5, + "completions/min_length": 650.0, + "completions/min_terminated_length": 650.0, + "epoch": 0.3985996499124781, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.395878140781291, + "kl": 0.008697509765625, + "learning_rate": 7.772640565481146e-07, + "loss": 0.0156, + "num_tokens": 72045845.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.7168657779693604, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.023667382384246816, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10852735380327623, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17018508443151817, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1594 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1039.6875, + "completions/mean_terminated_length": 1009.0000610351562, + "completions/min_length": 640.0, + "completions/min_terminated_length": 640.0, + "epoch": 0.398849712428107, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.283984160466364, + "kl": 0.0201416015625, + "learning_rate": 7.769249368354855e-07, + "loss": 0.0, + "num_tokens": 72084424.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6014885902404785, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.3997231047759588, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.39397828625837045, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.102469507659596, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1595 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1261.75, + "completions/mean_terminated_length": 1118.800048828125, + "completions/min_length": 345.0, + "completions/min_terminated_length": 345.0, + "epoch": 0.39909977494373594, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5466513239822954, + "kl": 0.0092926025390625, + "learning_rate": 7.765856442135984e-07, + "loss": -0.0165, + "num_tokens": 72132516.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7729835510253906, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005695989416098663, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.01629243114843496, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09737289911202955, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 1596 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1338.3125, + "completions/mean_terminated_length": 1264.8182373046875, + "completions/min_length": 983.0, + "completions/min_terminated_length": 983.0, + "epoch": 0.39934983745936486, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9035975580013975, + "kl": 0.0121612548828125, + "learning_rate": 7.762461789409827e-07, + "loss": 0.0055, + "num_tokens": 72179001.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0503668785095215, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.031814962408772636, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.040463803007402925, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05900408021045227, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1597 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1068.875, + "completions/mean_terminated_length": 1007.2857666015625, + "completions/min_length": 443.0, + "completions/min_terminated_length": 443.0, + "epoch": 0.39959989997499373, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1143314649864897, + "kl": 0.01458740234375, + "learning_rate": 7.759065412762998e-07, + "loss": 0.0122, + "num_tokens": 72212767.0, + "reward": 0.0, + "reward_std": 0.7790982723236084, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1886551629304689, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3421823672065537, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1598 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1341.75, + "completions/mean_terminated_length": 1246.800048828125, + "completions/min_length": 680.0, + "completions/min_terminated_length": 680.0, + "epoch": 0.39984996249062266, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1486964806391273, + "kl": 0.00951385498046875, + "learning_rate": 7.755667314783419e-07, + "loss": -0.0226, + "num_tokens": 72264787.0, + "reward": 0.0, + "reward_std": 0.8932489156723022, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.018822905381403222, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.057930717343051545, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12164002752505564, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1599 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1411.0, + "completions/mean_length": 1125.125, + "completions/mean_terminated_length": 1100.1334228515625, + "completions/min_length": 716.0, + "completions/min_terminated_length": 716.0, + "epoch": 0.4001000250062516, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1258693213515096, + "kl": 0.00836181640625, + "learning_rate": 7.752267498060333e-07, + "loss": 0.0023, + "num_tokens": 72305005.0, + "reward": 0.0, + "reward_std": 0.8811789155006409, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.001133280069798093, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08667918487699058, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0665276327996565, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1600 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1379.0, + "completions/mean_length": 1142.5, + "completions/mean_terminated_length": 1091.4285888671875, + "completions/min_length": 291.0, + "completions/min_terminated_length": 291.0, + "epoch": 0.40035008752188045, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6742493315243165, + "kl": 0.0128936767578125, + "learning_rate": 7.748865965184285e-07, + "loss": -0.0544, + "num_tokens": 72354845.0, + "reward": 0.0, + "reward_std": 0.711220383644104, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.010580870011326625, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07731067068730048, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10174405069512348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1601 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1405.0, + "completions/mean_length": 1325.75, + "completions/mean_terminated_length": 1246.5455322265625, + "completions/min_length": 1105.0, + "completions/min_terminated_length": 1105.0, + "epoch": 0.4006001500375094, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7168299099197, + "kl": 0.011260986328125, + "learning_rate": 7.745462718747131e-07, + "loss": -0.0246, + "num_tokens": 72400073.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9625579118728638, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08864440967060773, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06650450104760552, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14782371884055637, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1602 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1408.875, + "completions/mean_terminated_length": 1208.4000244140625, + "completions/min_length": 1088.0, + "completions/min_terminated_length": 1088.0, + "epoch": 0.4008502125531383, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2460653894943885, + "kl": 0.018768310546875, + "learning_rate": 7.742057761342029e-07, + "loss": -0.0219, + "num_tokens": 72455455.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7645143270492554, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11106957015020276, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15668735868068936, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08777074514725114, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1603 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1425.25, + "completions/mean_terminated_length": 1350.5, + "completions/min_length": 1179.0, + "completions/min_terminated_length": 1179.0, + "epoch": 0.40110027506876716, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.675294792025924, + "kl": 0.0183258056640625, + "learning_rate": 7.738651095563448e-07, + "loss": 0.0192, + "num_tokens": 72512195.0, + "reward": 0.0, + "reward_std": 0.9487957954406738, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.028281162022312012, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09524120807987907, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026005, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1604 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1274.0, + "completions/mean_length": 1117.9375, + "completions/mean_terminated_length": 1092.4666748046875, + "completions/min_length": 817.0, + "completions/min_terminated_length": 817.0, + "epoch": 0.4013503375843961, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.306976811444816, + "kl": 0.0150299072265625, + "learning_rate": 7.735242724007148e-07, + "loss": 0.0224, + "num_tokens": 72545090.0, + "reward": 0.0, + "reward_std": 0.7310828566551208, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11354157706154419, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13891574262767725, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09339283817414602, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1605 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1234.0, + "completions/max_terminated_length": 1234.0, + "completions/mean_length": 971.25, + "completions/mean_terminated_length": 971.25, + "completions/min_length": 681.0, + "completions/min_terminated_length": 681.0, + "epoch": 0.401600400100025, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.295486606262917, + "kl": 0.0129241943359375, + "learning_rate": 7.7318326492702e-07, + "loss": -0.0458, + "num_tokens": 72577662.0, + "reward": 0.0, + "reward_std": 0.4759182631969452, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0016497416421158775, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06621072882045455, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503964, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1606 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1168.625, + "completions/mean_terminated_length": 910.888916015625, + "completions/min_length": 565.0, + "completions/min_terminated_length": 565.0, + "epoch": 0.40185046261565394, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.896522704779701, + "kl": 0.01080322265625, + "learning_rate": 7.728420873950965e-07, + "loss": 0.0041, + "num_tokens": 72638056.0, + "reward": 0.0, + "reward_std": 0.8267627358436584, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05552999903773472, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12237375627578954, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07503085784948506, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1607 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1370.0, + "completions/mean_length": 1233.6875, + "completions/mean_terminated_length": 1026.5555419921875, + "completions/min_length": 860.0, + "completions/min_terminated_length": 860.0, + "epoch": 0.4021005251312828, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.074687452288223, + "kl": 0.0140838623046875, + "learning_rate": 7.725007400649103e-07, + "loss": 0.0112, + "num_tokens": 72679819.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.7221042513847351, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.16676878523628724, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13177360804402025, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13662601021279466, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1608 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1335.8125, + "completions/mean_terminated_length": 1237.300048828125, + "completions/min_length": 1046.0, + "completions/min_terminated_length": 1046.0, + "epoch": 0.40235058764691173, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8124132875993677, + "kl": 0.011444091796875, + "learning_rate": 7.721592231965568e-07, + "loss": -0.0229, + "num_tokens": 72727808.0, + "reward": 0.0, + "reward_std": 0.9603712558746338, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0469031245318257, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20362905629956465, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06871842709362769, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1609 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 1196.0, + "completions/mean_terminated_length": 1175.7333984375, + "completions/min_length": 858.0, + "completions/min_terminated_length": 858.0, + "epoch": 0.40260065016254065, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9015377691528084, + "kl": 0.0122528076171875, + "learning_rate": 7.718175370502603e-07, + "loss": -0.02, + "num_tokens": 72773160.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7851569056510925, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01452829791520522, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09918447302825152, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05900408021045227, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1610 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1244.1875, + "completions/mean_terminated_length": 1185.1539306640625, + "completions/min_length": 827.0, + "completions/min_terminated_length": 827.0, + "epoch": 0.4028507126781695, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2759855412818877, + "kl": 0.017852783203125, + "learning_rate": 7.714756818863745e-07, + "loss": -0.0017, + "num_tokens": 72833875.0, + "reward": 0.0, + "reward_std": 0.5694240927696228, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08120672597031105, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3230552542641551, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6124999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09496588081262933, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1611 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1064.375, + "completions/mean_terminated_length": 1002.1428833007812, + "completions/min_length": 555.0, + "completions/min_terminated_length": 555.0, + "epoch": 0.40310077519379844, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2945297703583205, + "kl": 0.0123443603515625, + "learning_rate": 7.711336579653816e-07, + "loss": -0.1247, + "num_tokens": 72881225.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9023599624633789, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06658079067126386, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2518764735882452, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13158576980363348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1612 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1250.75, + "completions/mean_terminated_length": 1167.666748046875, + "completions/min_length": 975.0, + "completions/min_terminated_length": 975.0, + "epoch": 0.40335083770942737, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6589065591028653, + "kl": 0.008148193359375, + "learning_rate": 7.707914655478924e-07, + "loss": 0.0027, + "num_tokens": 72924125.0, + "reward": 0.0, + "reward_std": 0.5202261209487915, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0019246421644610306, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03693291724465334, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1517795672580372, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1613 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1402.0, + "completions/mean_length": 1209.375, + "completions/mean_terminated_length": 1190.0001220703125, + "completions/min_length": 961.0, + "completions/min_terminated_length": 961.0, + "epoch": 0.4036009002250563, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.076164234669731, + "kl": 0.0133514404296875, + "learning_rate": 7.70449104894646e-07, + "loss": 0.0244, + "num_tokens": 72974691.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7803144454956055, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04921647448904596, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05419067610722199, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08421753138505424, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1614 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1384.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1088.875, + "completions/mean_terminated_length": 1088.875, + "completions/min_length": 806.0, + "completions/min_terminated_length": 806.0, + "epoch": 0.40385096274068516, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1226291471848264, + "kl": 0.01593017578125, + "learning_rate": 7.701065762665103e-07, + "loss": -0.041, + "num_tokens": 73017785.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7884033918380737, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.036126834328734064, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12840425040197548, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11925695879998881, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1615 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1493.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1077.8125, + "completions/mean_terminated_length": 1077.8125, + "completions/min_length": 593.0, + "completions/min_terminated_length": 593.0, + "epoch": 0.4041010252563141, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.479185169530575, + "kl": 0.01214599609375, + "learning_rate": 7.697638799244803e-07, + "loss": -0.0352, + "num_tokens": 73060238.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5089660286903381, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.019350933315299114, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04009050234416078, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10318986456114841, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1616 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1144.0, + "completions/mean_length": 1119.5, + "completions/mean_terminated_length": 992.6666870117188, + "completions/min_length": 889.0, + "completions/min_terminated_length": 889.0, + "epoch": 0.404351087771943, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.279077974782196, + "kl": 0.0139923095703125, + "learning_rate": 7.694210161296795e-07, + "loss": -0.0597, + "num_tokens": 73099854.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9805716276168823, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0027581392051031026, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09939061050260432, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746357, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1617 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1451.0, + "completions/mean_length": 1304.3125, + "completions/mean_terminated_length": 1259.1539306640625, + "completions/min_length": 1022.0, + "completions/min_terminated_length": 1022.0, + "epoch": 0.4046011502875719, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9718873295084545, + "kl": 0.0129241943359375, + "learning_rate": 7.690779851433585e-07, + "loss": -0.0164, + "num_tokens": 73151011.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7308509349822998, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04238415572511402, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07122678698793564, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11538983843829066, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 1618 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1371.0, + "completions/max_terminated_length": 1371.0, + "completions/mean_length": 904.6875, + "completions/mean_terminated_length": 904.6875, + "completions/min_length": 685.0, + "completions/min_terminated_length": 685.0, + "epoch": 0.4048512128032008, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.104301575619807, + "kl": 0.00658416748046875, + "learning_rate": 7.687347872268956e-07, + "loss": -0.0724, + "num_tokens": 73200758.0, + "reward": 2.2351741790771484e-08, + "reward_std": 0.9756397604942322, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010895066333022781, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11006983601659456, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0718795288428261, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1619 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1363.0, + "completions/mean_length": 1351.6875, + "completions/mean_terminated_length": 1236.3333740234375, + "completions/min_length": 1076.0, + "completions/min_terminated_length": 1076.0, + "epoch": 0.4051012753188297, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.851452291338438, + "kl": 0.012939453125, + "learning_rate": 7.683914226417962e-07, + "loss": -0.0223, + "num_tokens": 73246337.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0226210355758667, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06486921996816304, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13943531254969008, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12224747213928167, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1620 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1224.875, + "completions/mean_terminated_length": 1059.800048828125, + "completions/min_length": 831.0, + "completions/min_terminated_length": 831.0, + "epoch": 0.4053513378344586, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.952785634628913, + "kl": 0.01123046875, + "learning_rate": 7.680478916496926e-07, + "loss": 0.0161, + "num_tokens": 73283159.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9983932971954346, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797313, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1621 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1238.0, + "completions/mean_length": 976.5, + "completions/mean_terminated_length": 941.6000366210938, + "completions/min_length": 602.0, + "completions/min_terminated_length": 602.0, + "epoch": 0.4056014003500875, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.567954210265031, + "kl": 0.016998291015625, + "learning_rate": 7.677041945123441e-07, + "loss": -0.0365, + "num_tokens": 73331415.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9122024774551392, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.022543675897674156, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06981030694660756, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11800816042090449, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1622 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1082.0, + "completions/mean_terminated_length": 1022.2857666015625, + "completions/min_length": 792.0, + "completions/min_terminated_length": 792.0, + "epoch": 0.40585146286571644, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.456948435358128, + "kl": 0.016693115234375, + "learning_rate": 7.673603314916366e-07, + "loss": -0.0377, + "num_tokens": 73374207.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0166778564453125, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.059769882222178, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10135788655614283, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567835, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1623 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1320.0, + "completions/mean_length": 1158.0, + "completions/mean_terminated_length": 1079.076904296875, + "completions/min_length": 824.0, + "completions/min_terminated_length": 824.0, + "epoch": 0.40610152538134536, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1355389340977635, + "kl": 0.012359619140625, + "learning_rate": 7.670163028495821e-07, + "loss": 0.0556, + "num_tokens": 73416367.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7798465490341187, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.003345619340249059, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03786138493773039, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466157, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1624 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1033.0, + "completions/max_terminated_length": 1033.0, + "completions/mean_length": 722.5625, + "completions/mean_terminated_length": 722.5625, + "completions/min_length": 223.0, + "completions/min_terminated_length": 223.0, + "epoch": 0.40635158789697423, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.814014170971254, + "kl": 0.014373779296875, + "learning_rate": 7.66672108848319e-07, + "loss": -0.0216, + "num_tokens": 73447536.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0300780534744263, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0730277292892191, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0719774492438926, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797315, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1625 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1335.0, + "completions/max_terminated_length": 1335.0, + "completions/mean_length": 1187.25, + "completions/mean_terminated_length": 1187.25, + "completions/min_length": 996.0, + "completions/min_terminated_length": 996.0, + "epoch": 0.40660165041260315, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.293630302577568, + "kl": 0.0158843994140625, + "learning_rate": 7.66327749750112e-07, + "loss": -0.0128, + "num_tokens": 73494964.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.062612533569336, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1182905772304078, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07468533073299631, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10945995377982527, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1626 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1464.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1087.75, + "completions/mean_terminated_length": 1087.75, + "completions/min_length": 937.0, + "completions/min_terminated_length": 937.0, + "epoch": 0.4068517129282321, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5474151356038344, + "kl": 0.0163116455078125, + "learning_rate": 7.659832258173507e-07, + "loss": 0.0341, + "num_tokens": 73545384.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9162627458572388, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03246916429546719, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08759681810862804, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238707, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1627 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1141.0, + "completions/max_terminated_length": 1141.0, + "completions/mean_length": 943.8125, + "completions/mean_terminated_length": 943.8125, + "completions/min_length": 706.0, + "completions/min_terminated_length": 706.0, + "epoch": 0.40710177544386095, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9036353782921043, + "kl": 0.013946533203125, + "learning_rate": 7.656385373125515e-07, + "loss": -0.0018, + "num_tokens": 73577389.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9496052265167236, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08578677292075729, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09976725363373616, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15371932093796678, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1628 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1316.1875, + "completions/mean_terminated_length": 1254.916748046875, + "completions/min_length": 1005.0, + "completions/min_terminated_length": 1005.0, + "epoch": 0.40735183795948987, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.807051628793773, + "kl": 0.01275634765625, + "learning_rate": 7.65293684498355e-07, + "loss": 0.0129, + "num_tokens": 73635688.0, + "reward": -3.725290298461914e-08, + "reward_std": 1.067591667175293, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02127420549506371, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07230699767837566, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715322, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1629 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1159.0, + "completions/mean_length": 1270.8125, + "completions/mean_terminated_length": 1041.625, + "completions/min_length": 1008.0, + "completions/min_terminated_length": 1008.0, + "epoch": 0.4076019004751188, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5823915999988554, + "kl": 0.01740264892578125, + "learning_rate": 7.64948667637528e-07, + "loss": 0.0263, + "num_tokens": 73688645.0, + "reward": 0.0, + "reward_std": 0.7360256314277649, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02115591679712968, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.033699396894585544, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1630 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1143.9375, + "completions/mean_terminated_length": 982.0909423828125, + "completions/min_length": 784.0, + "completions/min_terminated_length": 784.0, + "epoch": 0.40785196299074766, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.827237105479521, + "kl": 0.0104217529296875, + "learning_rate": 7.64603486992962e-07, + "loss": 0.0358, + "num_tokens": 73735508.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9667670130729675, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07316526900627612, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11538029413693852, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06666666666666668, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1631 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1406.0, + "completions/mean_terminated_length": 1312.0, + "completions/min_length": 1154.0, + "completions/min_terminated_length": 1154.0, + "epoch": 0.4081020255063766, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4657422012238617, + "kl": 0.01263427734375, + "learning_rate": 7.642581428276727e-07, + "loss": -0.0026, + "num_tokens": 73785348.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9363234043121338, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.058822653704051875, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0728715490546228, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10174405069512348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1632 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1313.0, + "completions/max_terminated_length": 1313.0, + "completions/mean_length": 1051.3125, + "completions/mean_terminated_length": 1051.3125, + "completions/min_length": 723.0, + "completions/min_terminated_length": 723.0, + "epoch": 0.4083520880220055, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2217058247171675, + "kl": 0.0149993896484375, + "learning_rate": 7.639126354048011e-07, + "loss": -0.0526, + "num_tokens": 73829209.0, + "reward": 0.0, + "reward_std": 1.0592005252838135, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.024817492983190647, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.025631396538271147, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026005, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1633 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1009.0, + "completions/max_terminated_length": 1009.0, + "completions/mean_length": 754.875, + "completions/mean_terminated_length": 754.875, + "completions/min_length": 492.0, + "completions/min_terminated_length": 492.0, + "epoch": 0.40860215053763443, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0507876631479833, + "kl": 0.00897979736328125, + "learning_rate": 7.635669649876126e-07, + "loss": -0.0347, + "num_tokens": 73855887.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9966955780982971, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.053247946341417704, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20413989590417722, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.675, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066221, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1634 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1296.5, + "completions/mean_terminated_length": 1093.0, + "completions/min_length": 940.0, + "completions/min_terminated_length": 940.0, + "epoch": 0.4088522130532633, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6832833785886225, + "kl": 0.009765625, + "learning_rate": 7.632211318394961e-07, + "loss": -0.0333, + "num_tokens": 73901975.0, + "reward": 0.0, + "reward_std": 1.031982660293579, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.14959643556187055, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14979976019522767, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11917929226045819, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1635 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1400.8125, + "completions/mean_terminated_length": 1323.6666259765625, + "completions/min_length": 945.0, + "completions/min_terminated_length": 945.0, + "epoch": 0.4091022755688922, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.134738236761002, + "kl": 0.016143798828125, + "learning_rate": 7.628751362239652e-07, + "loss": 0.0038, + "num_tokens": 73963636.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7798345685005188, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.004975009904965902, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07724268433892245, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09269623828717428, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1636 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1425.0, + "completions/mean_length": 1159.875, + "completions/mean_terminated_length": 1046.5, + "completions/min_length": 755.0, + "completions/min_terminated_length": 755.0, + "epoch": 0.40935233808452115, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.376462371338996, + "kl": 0.0085601806640625, + "learning_rate": 7.625289784046573e-07, + "loss": -0.0291, + "num_tokens": 74016530.0, + "reward": 0.0, + "reward_std": 1.0067377090454102, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03120315190739062, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10366104623824578, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0850925422157591, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1637 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1244.875, + "completions/mean_terminated_length": 1159.8333740234375, + "completions/min_length": 642.0, + "completions/min_terminated_length": 642.0, + "epoch": 0.40960240060015, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6600701207530135, + "kl": 0.0119171142578125, + "learning_rate": 7.621826586453327e-07, + "loss": -0.0727, + "num_tokens": 74062672.0, + "reward": 0.0, + "reward_std": 0.6177958846092224, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01139168323167249, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08323289015397711, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901863, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1638 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1192.0, + "completions/mean_length": 1060.8125, + "completions/mean_terminated_length": 1031.533447265625, + "completions/min_length": 841.0, + "completions/min_terminated_length": 841.0, + "epoch": 0.40985246311577894, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7901622549668503, + "kl": 0.0159912109375, + "learning_rate": 7.618361772098758e-07, + "loss": -0.0329, + "num_tokens": 74100669.0, + "reward": 0.0, + "reward_std": 0.4796094596385956, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04519165692556638, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05698263616973287, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06206328908341753, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1639 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1135.3125, + "completions/mean_terminated_length": 851.6666870117188, + "completions/min_length": 592.0, + "completions/min_terminated_length": 592.0, + "epoch": 0.41010252563140787, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8667739959589453, + "kl": 0.01163482666015625, + "learning_rate": 7.61489534362294e-07, + "loss": -0.0274, + "num_tokens": 74146858.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9513424634933472, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.006026133685409982, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.020546649158195436, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10809803506625451, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1640 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1113.0, + "completions/max_terminated_length": 1113.0, + "completions/mean_length": 917.4375, + "completions/mean_terminated_length": 917.4375, + "completions/min_length": 698.0, + "completions/min_terminated_length": 698.0, + "epoch": 0.41035258814703673, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.162713174364149, + "kl": 0.022796630859375, + "learning_rate": 7.611427303667174e-07, + "loss": -0.0284, + "num_tokens": 74201273.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0683602094650269, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05593567776241068, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12628482300330102, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590963, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1641 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1289.625, + "completions/mean_terminated_length": 1219.5, + "completions/min_length": 758.0, + "completions/min_terminated_length": 758.0, + "epoch": 0.41060265066266566, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1357161562193223, + "kl": 0.010955810546875, + "learning_rate": 7.607957654873995e-07, + "loss": -0.055, + "num_tokens": 74253603.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.969018816947937, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04002385866049527, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06435098221778776, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10602235962635781, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1642 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1351.0, + "completions/mean_length": 1212.4375, + "completions/mean_terminated_length": 1171.357177734375, + "completions/min_length": 1064.0, + "completions/min_terminated_length": 1064.0, + "epoch": 0.4108527131782946, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.247477051437793, + "kl": 0.0109405517578125, + "learning_rate": 7.604486399887153e-07, + "loss": -0.0169, + "num_tokens": 74297986.0, + "reward": 0.0, + "reward_std": 1.0636203289031982, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08022777305188443, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18823903739333256, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1643 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1340.8125, + "completions/mean_terminated_length": 1304.0770263671875, + "completions/min_length": 1100.0, + "completions/min_terminated_length": 1100.0, + "epoch": 0.4111027756939235, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9156677207387305, + "kl": 0.0136260986328125, + "learning_rate": 7.601013541351638e-07, + "loss": -0.0239, + "num_tokens": 74353703.0, + "reward": 0.0, + "reward_std": 0.8252875804901123, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07643871513085648, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12666144890915146, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09098229375970786, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1644 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1351.1875, + "completions/mean_terminated_length": 1235.4444580078125, + "completions/min_length": 951.0, + "completions/min_terminated_length": 951.0, + "epoch": 0.4113528382095524, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9466226712887824, + "kl": 0.0155487060546875, + "learning_rate": 7.597539081913645e-07, + "loss": 0.0212, + "num_tokens": 74410002.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8241056799888611, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07682182095894138, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.27726024644206687, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1343709624716425, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1645 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1179.6875, + "completions/mean_terminated_length": 1133.9285888671875, + "completions/min_length": 681.0, + "completions/min_terminated_length": 681.0, + "epoch": 0.4116029007251813, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6876923807090995, + "kl": 0.011810302734375, + "learning_rate": 7.594063024220601e-07, + "loss": -0.0344, + "num_tokens": 74451277.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8990912437438965, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.042607206353925516, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21220213698840068, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1646 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1393.625, + "completions/mean_terminated_length": 1329.800048828125, + "completions/min_length": 747.0, + "completions/min_terminated_length": 747.0, + "epoch": 0.4118529632408102, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.993247087676568, + "kl": 0.01470947265625, + "learning_rate": 7.590585370921144e-07, + "loss": -0.0408, + "num_tokens": 74507759.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0057283639907837, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04458750557757054, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05797450443616805, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15396007178390023, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1647 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1121.75, + "completions/mean_terminated_length": 1096.533447265625, + "completions/min_length": 661.0, + "completions/min_terminated_length": 661.0, + "epoch": 0.4121030257564391, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2156578686348425, + "kl": 0.014923095703125, + "learning_rate": 7.587106124665131e-07, + "loss": -0.0292, + "num_tokens": 74557643.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.0619440078735352, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07763841105836113, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10345798638599232, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026005, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1648 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1181.1875, + "completions/mean_terminated_length": 1159.933349609375, + "completions/min_length": 813.0, + "completions/min_terminated_length": 813.0, + "epoch": 0.412353088272068, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0163606873012108, + "kl": 0.0120086669921875, + "learning_rate": 7.583625288103635e-07, + "loss": -0.0329, + "num_tokens": 74603582.0, + "reward": 0.0, + "reward_std": 0.744328498840332, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05360552486808002, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20330778044297967, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408158, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1649 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1457.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1185.1875, + "completions/mean_terminated_length": 1185.1875, + "completions/min_length": 659.0, + "completions/min_terminated_length": 659.0, + "epoch": 0.41260315078769694, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1379389806383147, + "kl": 0.01373291015625, + "learning_rate": 7.580142863888933e-07, + "loss": 0.0441, + "num_tokens": 74652649.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0455820560455322, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.039705486424343336, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14242541114524265, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13443985299781488, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1650 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1431.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1054.3125, + "completions/mean_terminated_length": 1054.3125, + "completions/min_length": 786.0, + "completions/min_terminated_length": 786.0, + "epoch": 0.4128532133033258, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0539817842635424, + "kl": 0.0120697021484375, + "learning_rate": 7.57665885467452e-07, + "loss": 0.0074, + "num_tokens": 74708702.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7204165458679199, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07398051050339775, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.45725457589565227, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901862, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1651 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1286.0, + "completions/mean_length": 1117.5, + "completions/mean_terminated_length": 990.0, + "completions/min_length": 758.0, + "completions/min_terminated_length": 758.0, + "epoch": 0.41310327581895473, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.097164296829905, + "kl": 0.015716552734375, + "learning_rate": 7.573173263115092e-07, + "loss": 0.0107, + "num_tokens": 74758422.0, + "reward": 0.0, + "reward_std": 0.43053457140922546, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.062493730748263816, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14377385476557067, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09428090415820635, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1652 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1262.0, + "completions/max_terminated_length": 1262.0, + "completions/mean_length": 908.5, + "completions/mean_terminated_length": 908.5, + "completions/min_length": 661.0, + "completions/min_terminated_length": 661.0, + "epoch": 0.41335333833458365, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0157182152699753, + "kl": 0.0160064697265625, + "learning_rate": 7.569686091866555e-07, + "loss": -0.0031, + "num_tokens": 74794182.0, + "reward": 0.0, + "reward_std": 0.8773343563079834, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12561777770815982, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17680613423384395, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06440611887195309, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1653 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1153.625, + "completions/mean_terminated_length": 1104.1429443359375, + "completions/min_length": 787.0, + "completions/min_terminated_length": 787.0, + "epoch": 0.4136034008502126, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0011862326344025, + "kl": 0.0114898681640625, + "learning_rate": 7.56619734358602e-07, + "loss": -0.0076, + "num_tokens": 74840344.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0319846868515015, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.029528456837645784, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.055817981951147455, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10390522747338703, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1654 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1435.0, + "completions/mean_length": 1229.6875, + "completions/mean_terminated_length": 1211.666748046875, + "completions/min_length": 925.0, + "completions/min_terminated_length": 925.0, + "epoch": 0.41385346336584145, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.522322711692423, + "kl": 0.0099945068359375, + "learning_rate": 7.562707020931795e-07, + "loss": -0.0257, + "num_tokens": 74886275.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7764508724212646, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.013368301599249843, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05386684448302195, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0925962962222252, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1655 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1385.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 984.3125, + "completions/mean_terminated_length": 984.3125, + "completions/min_length": 763.0, + "completions/min_terminated_length": 763.0, + "epoch": 0.41410352588147037, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.177262429497986, + "kl": 0.0114898681640625, + "learning_rate": 7.559215126563391e-07, + "loss": -0.0277, + "num_tokens": 74916648.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9935610294342041, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12136078778718966, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09341545264465391, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1046156988431681, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1656 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1383.75, + "completions/mean_terminated_length": 1330.9091796875, + "completions/min_length": 1238.0, + "completions/min_terminated_length": 1238.0, + "epoch": 0.4143535883970993, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.752625444783915, + "kl": 0.0122528076171875, + "learning_rate": 7.555721663141512e-07, + "loss": 0.0185, + "num_tokens": 74975124.0, + "reward": 0.0, + "reward_std": 0.8692355155944824, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10981046364813994, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2507074947941006, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09339283817414602, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1657 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1231.0, + "completions/mean_length": 1011.9375, + "completions/mean_terminated_length": 979.4000244140625, + "completions/min_length": 754.0, + "completions/min_terminated_length": 754.0, + "epoch": 0.41460365091272816, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4617453689126902, + "kl": 0.0112762451171875, + "learning_rate": 7.552226633328067e-07, + "loss": 0.0079, + "num_tokens": 75009307.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8644049167633057, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0730910912854593, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06441869141968827, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08062257748298551, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1658 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1445.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1165.5625, + "completions/mean_terminated_length": 1165.5625, + "completions/min_length": 752.0, + "completions/min_terminated_length": 752.0, + "epoch": 0.4148537134283571, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6783559100628342, + "kl": 0.0276947021484375, + "learning_rate": 7.548730039786146e-07, + "loss": -0.0058, + "num_tokens": 75048132.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8120135068893433, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00023706675352496332, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03881604174047323, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10183501544346313, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1659 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1142.0625, + "completions/mean_terminated_length": 1059.4615478515625, + "completions/min_length": 775.0, + "completions/min_terminated_length": 775.0, + "epoch": 0.415103775943986, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8921722529973684, + "kl": 0.011016845703125, + "learning_rate": 7.545231885180044e-07, + "loss": -0.0664, + "num_tokens": 75087045.0, + "reward": -6.705522537231445e-08, + "reward_std": 1.0464140176773071, + "rewards/wordcountpos_reward_GEOBench/mean": -6.705522537231445e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07314880634828491, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18538615840530656, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1660 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 939.875, + "completions/mean_terminated_length": 902.5333862304688, + "completions/min_length": 483.0, + "completions/min_terminated_length": 483.0, + "epoch": 0.4153538384596149, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.080031640108799, + "kl": 0.011920928955078125, + "learning_rate": 7.541732172175231e-07, + "loss": -0.0569, + "num_tokens": 75133651.0, + "reward": 0.0, + "reward_std": 0.8746100664138794, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1135869870272698, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10133813845812673, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07187952884282607, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1661 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1271.8125, + "completions/mean_terminated_length": 1239.21435546875, + "completions/min_length": 957.0, + "completions/min_terminated_length": 957.0, + "epoch": 0.4156039009752438, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3688912820288666, + "kl": 0.01641845703125, + "learning_rate": 7.538230903438378e-07, + "loss": 0.0155, + "num_tokens": 75190080.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0080353021621704, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.180892398371815, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11637301077520698, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0666666666666667, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1662 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1299.8125, + "completions/mean_terminated_length": 1253.615478515625, + "completions/min_length": 937.0, + "completions/min_terminated_length": 937.0, + "epoch": 0.4158539634908727, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.204496675491254, + "kl": 0.018035888671875, + "learning_rate": 7.534728081637333e-07, + "loss": -0.0102, + "num_tokens": 75237381.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.71047443151474, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.054445826382074014, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.060196701887033806, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13305526559931294, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1663 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1372.0, + "completions/max_terminated_length": 1372.0, + "completions/mean_length": 1019.875, + "completions/mean_terminated_length": 1019.875, + "completions/min_length": 680.0, + "completions/min_terminated_length": 680.0, + "epoch": 0.41610402600650165, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4872736702709664, + "kl": 0.0125274658203125, + "learning_rate": 7.531223709441129e-07, + "loss": 0.0021, + "num_tokens": 75266867.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.528732180595398, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.33853367021449576, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.23881988089570744, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1664 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1232.875, + "completions/mean_terminated_length": 1215.0667724609375, + "completions/min_length": 972.0, + "completions/min_terminated_length": 972.0, + "epoch": 0.4163540885221305, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7234110731375165, + "kl": 0.012359619140625, + "learning_rate": 7.527717789519981e-07, + "loss": 0.0171, + "num_tokens": 75316817.0, + "reward": 0.0, + "reward_std": 0.8514928817749023, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06259640181184897, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19706353545756353, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921945, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1665 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1130.9375, + "completions/mean_terminated_length": 1078.21435546875, + "completions/min_length": 908.0, + "completions/min_terminated_length": 908.0, + "epoch": 0.41660415103775944, + "frac_reward_zero_std": 0.0, + "grad_norm": 352.69591569909403, + "kl": 0.1099090576171875, + "learning_rate": 7.524210324545288e-07, + "loss": 0.004, + "num_tokens": 75367240.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0536458492279053, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.061090382015045616, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10303892413156415, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901162, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1666 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1467.0, + "completions/mean_length": 1327.25, + "completions/mean_terminated_length": 1105.1429443359375, + "completions/min_length": 881.0, + "completions/min_terminated_length": 881.0, + "epoch": 0.41685421355338836, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.14048655300006, + "kl": 0.00937652587890625, + "learning_rate": 7.520701317189615e-07, + "loss": 0.0227, + "num_tokens": 75404524.0, + "reward": 0.0, + "reward_std": 0.8738413453102112, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04956228342438993, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09008881667279273, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14751020052613062, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1667 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1389.0, + "completions/mean_length": 1284.3125, + "completions/mean_terminated_length": 1116.5555419921875, + "completions/min_length": 983.0, + "completions/min_terminated_length": 983.0, + "epoch": 0.41710427606901723, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.046464100520718, + "kl": 0.013275146484375, + "learning_rate": 7.517190770126713e-07, + "loss": -0.0153, + "num_tokens": 75452937.0, + "reward": 4.470348358154297e-08, + "reward_std": 1.0508009195327759, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10736465454999562, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2149182754205662, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06426219440409446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1668 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1306.0, + "completions/mean_length": 1350.375, + "completions/mean_terminated_length": 1200.75, + "completions/min_length": 1167.0, + "completions/min_terminated_length": 1167.0, + "epoch": 0.41735433858464616, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.853569878077144, + "kl": 0.00395965576171875, + "learning_rate": 7.513678686031503e-07, + "loss": 0.0202, + "num_tokens": 75501447.0, + "reward": 0.0, + "reward_std": 0.8619295358657837, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1608095071520528, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16531802563030085, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12164002752505566, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1669 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1449.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1065.75, + "completions/mean_terminated_length": 1065.75, + "completions/min_length": 825.0, + "completions/min_terminated_length": 825.0, + "epoch": 0.4176044011002751, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7848027567508638, + "kl": 0.0147247314453125, + "learning_rate": 7.510165067580072e-07, + "loss": 0.0112, + "num_tokens": 75534667.0, + "reward": 0.0, + "reward_std": 0.6430781483650208, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.047475256385602474, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12432251257627341, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10327955589886446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1670 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1034.5, + "completions/mean_terminated_length": 1003.4667358398438, + "completions/min_length": 545.0, + "completions/min_terminated_length": 545.0, + "epoch": 0.41785446361590395, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.639206938731161, + "kl": 0.01300048828125, + "learning_rate": 7.506649917449684e-07, + "loss": -0.1419, + "num_tokens": 75574179.0, + "reward": 0.0, + "reward_std": 1.0350569486618042, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12258390344850577, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13718264877575484, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12345339501504503, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1671 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1165.0, + "completions/mean_length": 1211.25, + "completions/mean_terminated_length": 1038.0, + "completions/min_length": 830.0, + "completions/min_terminated_length": 830.0, + "epoch": 0.41810452613153287, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8015411511155777, + "kl": 0.012176513671875, + "learning_rate": 7.503133238318765e-07, + "loss": 0.0016, + "num_tokens": 75622439.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.88728928565979, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.061826483782218784, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04035799000724675, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353543, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1672 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1401.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1226.5625, + "completions/mean_terminated_length": 1226.5625, + "completions/min_length": 970.0, + "completions/min_terminated_length": 970.0, + "epoch": 0.4183545886471618, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5419084887981236, + "kl": 0.0146636962890625, + "learning_rate": 7.499615032866909e-07, + "loss": -0.0098, + "num_tokens": 75667656.0, + "reward": 0.0, + "reward_std": 0.9284898638725281, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.19476320645906825, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19560001489884796, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0850925422157591, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1673 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1360.0, + "completions/mean_length": 1232.625, + "completions/mean_terminated_length": 1170.923095703125, + "completions/min_length": 962.0, + "completions/min_terminated_length": 962.0, + "epoch": 0.4186046511627907, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3677810562328387, + "kl": 0.019439697265625, + "learning_rate": 7.496095303774869e-07, + "loss": -0.0123, + "num_tokens": 75721858.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.0121711492538452, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0575269395411657, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1194371664054065, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05900408021045224, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1674 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1365.375, + "completions/mean_terminated_length": 1260.6666259765625, + "completions/min_length": 1032.0, + "completions/min_terminated_length": 1032.0, + "epoch": 0.4188547136784196, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0169423724488693, + "kl": 0.012054443359375, + "learning_rate": 7.492574053724566e-07, + "loss": 0.0173, + "num_tokens": 75772464.0, + "reward": -1.862645149230957e-08, + "reward_std": 1.0512410402297974, + "rewards/wordcountpos_reward_GEOBench/mean": -1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.015037599218082807, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.028510167474891296, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12102953419784838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1675 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 960.9375, + "completions/mean_terminated_length": 925.0000610351562, + "completions/min_length": 690.0, + "completions/min_terminated_length": 690.0, + "epoch": 0.4191047761940485, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.973655914597154, + "kl": 0.016510009765625, + "learning_rate": 7.489051285399072e-07, + "loss": 0.0086, + "num_tokens": 75808975.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8450506925582886, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.027405335450309667, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04364323042812711, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09803627446568497, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1676 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1223.9375, + "completions/mean_terminated_length": 1184.5, + "completions/min_length": 795.0, + "completions/min_terminated_length": 795.0, + "epoch": 0.41935483870967744, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2453231755059297, + "kl": 0.0141143798828125, + "learning_rate": 7.48552700148262e-07, + "loss": -0.0076, + "num_tokens": 75846510.0, + "reward": 0.0, + "reward_std": 1.0259451866149902, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.018928757776922846, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07616112908901099, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1166666666666667, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1677 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1366.0, + "completions/max_terminated_length": 1366.0, + "completions/mean_length": 1007.5, + "completions/mean_terminated_length": 1007.5, + "completions/min_length": 699.0, + "completions/min_terminated_length": 699.0, + "epoch": 0.4196049012253063, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6445059381288125, + "kl": 0.013580322265625, + "learning_rate": 7.482001204660598e-07, + "loss": 0.0258, + "num_tokens": 75881886.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9271225929260254, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005174079518608373, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.060423976352819674, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1067187372905475, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1678 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1372.0, + "completions/max_terminated_length": 1372.0, + "completions/mean_length": 968.8125, + "completions/mean_terminated_length": 968.8125, + "completions/min_length": 747.0, + "completions/min_terminated_length": 747.0, + "epoch": 0.4198549637409352, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.588953746070299, + "kl": 0.00864410400390625, + "learning_rate": 7.478473897619545e-07, + "loss": -0.0322, + "num_tokens": 75906371.0, + "reward": 0.0, + "reward_std": 0.6871511936187744, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.027500029804884238, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04518069376347753, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0843274042711568, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1679 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1273.0, + "completions/mean_length": 1087.0, + "completions/mean_terminated_length": 1028.0, + "completions/min_length": 797.0, + "completions/min_terminated_length": 797.0, + "epoch": 0.42010502625656415, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.976500688172902, + "kl": 0.014862060546875, + "learning_rate": 7.474945083047152e-07, + "loss": 0.0096, + "num_tokens": 75946907.0, + "reward": 0.0, + "reward_std": 0.7001305818557739, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03337728606551483, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1165142254640259, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1112221667221529, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1680 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1414.0, + "completions/mean_length": 1087.125, + "completions/mean_terminated_length": 1059.60009765625, + "completions/min_length": 687.0, + "completions/min_terminated_length": 687.0, + "epoch": 0.4203550887721931, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8852645229207696, + "kl": 0.0149688720703125, + "learning_rate": 7.471414763632257e-07, + "loss": -0.0303, + "num_tokens": 75986373.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0073473453521729, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.059286178166472955, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04811381485654346, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715323, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1681 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1349.0, + "completions/max_terminated_length": 1349.0, + "completions/mean_length": 998.6875, + "completions/mean_terminated_length": 998.6875, + "completions/min_length": 674.0, + "completions/min_terminated_length": 674.0, + "epoch": 0.42060515128782194, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.443061894118909, + "kl": 0.016448974609375, + "learning_rate": 7.467882942064849e-07, + "loss": 0.0307, + "num_tokens": 76023112.0, + "reward": 0.0, + "reward_std": 0.666774570941925, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06464804285158501, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2868455779581023, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08255189164891874, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1682 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1369.0, + "completions/mean_length": 1250.125, + "completions/mean_terminated_length": 1166.8333740234375, + "completions/min_length": 912.0, + "completions/min_terminated_length": 912.0, + "epoch": 0.42085521380345087, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3520773081283024, + "kl": 0.013092041015625, + "learning_rate": 7.464349621036057e-07, + "loss": 0.0099, + "num_tokens": 76072506.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8601709604263306, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.006751471774873923, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0859148319344969, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1683 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1309.9375, + "completions/mean_terminated_length": 1282.7857666015625, + "completions/min_length": 1030.0, + "completions/min_terminated_length": 1030.0, + "epoch": 0.4211052763190798, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5922912648756546, + "kl": 0.01165771484375, + "learning_rate": 7.460814803238155e-07, + "loss": -0.0217, + "num_tokens": 76113609.0, + "reward": 0.0, + "reward_std": 0.7738279104232788, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10090979020775993, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05932376098735716, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12852438880818895, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1684 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1161.0, + "completions/max_terminated_length": 1161.0, + "completions/mean_length": 964.1875, + "completions/mean_terminated_length": 964.1875, + "completions/min_length": 825.0, + "completions/min_terminated_length": 825.0, + "epoch": 0.42135533883470866, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5849549743152753, + "kl": 0.012481689453125, + "learning_rate": 7.457278491364553e-07, + "loss": -0.0199, + "num_tokens": 76144036.0, + "reward": 0.0, + "reward_std": 0.8819991946220398, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1049069222084735, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17179637036653558, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04791968589521739, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1685 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1204.625, + "completions/mean_terminated_length": 1106.166748046875, + "completions/min_length": 686.0, + "completions/min_terminated_length": 686.0, + "epoch": 0.4216054013503376, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7781217292552705, + "kl": 0.00997161865234375, + "learning_rate": 7.453740688109809e-07, + "loss": 0.0173, + "num_tokens": 76184190.0, + "reward": 0.0, + "reward_std": 0.9381718635559082, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02792995423312164, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0468694629524883, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13492110177323527, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1686 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1456.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1143.5625, + "completions/mean_terminated_length": 1143.5625, + "completions/min_length": 759.0, + "completions/min_terminated_length": 759.0, + "epoch": 0.4218554638659665, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.749216294450825, + "kl": 0.0162353515625, + "learning_rate": 7.450201396169608e-07, + "loss": -0.0525, + "num_tokens": 76234927.0, + "reward": 0.0, + "reward_std": 0.5604196786880493, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.058426604084554024, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10932292000659724, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1687 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1238.0, + "completions/max_terminated_length": 1238.0, + "completions/mean_length": 1032.75, + "completions/mean_terminated_length": 1032.75, + "completions/min_length": 735.0, + "completions/min_terminated_length": 735.0, + "epoch": 0.4221055263815954, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.54721177482292, + "kl": 0.0163726806640625, + "learning_rate": 7.446660618240773e-07, + "loss": 0.0234, + "num_tokens": 76268723.0, + "reward": 0.0, + "reward_std": 0.7959558963775635, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1331995904975732, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2206596147612983, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620107, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1688 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1424.9375, + "completions/mean_terminated_length": 1379.9000244140625, + "completions/min_length": 1179.0, + "completions/min_terminated_length": 1179.0, + "epoch": 0.4223555888972243, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6184243170295427, + "kl": 0.0108184814453125, + "learning_rate": 7.443118357021259e-07, + "loss": 0.0057, + "num_tokens": 76317250.0, + "reward": 0.0, + "reward_std": 0.45646435022354126, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.045059351073713325, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06750889359787252, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10390522747338703, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1689 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1352.0, + "completions/max_terminated_length": 1352.0, + "completions/mean_length": 1090.875, + "completions/mean_terminated_length": 1090.875, + "completions/min_length": 599.0, + "completions/min_terminated_length": 599.0, + "epoch": 0.4226056514128532, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.570877935720824, + "kl": 0.0176849365234375, + "learning_rate": 7.439574615210152e-07, + "loss": -0.0199, + "num_tokens": 76366976.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.762123703956604, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0628639270331137, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07138934257799437, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08944271909999162, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1690 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1148.5, + "completions/mean_terminated_length": 1125.0667724609375, + "completions/min_length": 823.0, + "completions/min_terminated_length": 823.0, + "epoch": 0.42285571392848215, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6253306355021957, + "kl": 0.01507568359375, + "learning_rate": 7.436029395507665e-07, + "loss": 0.0062, + "num_tokens": 76419776.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9695255756378174, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11013459778666118, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1691 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1404.875, + "completions/mean_terminated_length": 1282.571533203125, + "completions/min_length": 1088.0, + "completions/min_terminated_length": 1088.0, + "epoch": 0.423105776444111, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8550875094893957, + "kl": 0.01202392578125, + "learning_rate": 7.432482700615137e-07, + "loss": -0.0461, + "num_tokens": 76467006.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.933671772480011, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05129599723142419, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19443267719062804, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042258, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1692 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1078.0, + "completions/max_terminated_length": 1078.0, + "completions/mean_length": 890.0, + "completions/mean_terminated_length": 890.0, + "completions/min_length": 752.0, + "completions/min_terminated_length": 752.0, + "epoch": 0.42335583895973994, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.975435807586785, + "kl": 0.014739990234375, + "learning_rate": 7.428934533235035e-07, + "loss": -0.0382, + "num_tokens": 76506838.0, + "reward": 0.0, + "reward_std": 1.0381333827972412, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02454417269728037, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07265462035688092, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590968, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1693 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1216.6875, + "completions/mean_terminated_length": 1197.800048828125, + "completions/min_length": 975.0, + "completions/min_terminated_length": 975.0, + "epoch": 0.42360590147536886, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2810816333412793, + "kl": 0.0135650634765625, + "learning_rate": 7.425384896070939e-07, + "loss": 0.0189, + "num_tokens": 76550049.0, + "reward": 0.0, + "reward_std": 0.8792513012886047, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06005866977550446, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05571149837005707, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1694 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1403.375, + "completions/mean_terminated_length": 1279.1429443359375, + "completions/min_length": 1052.0, + "completions/min_terminated_length": 1052.0, + "epoch": 0.42385596399099773, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8430440315917709, + "kl": 0.008087158203125, + "learning_rate": 7.421833791827557e-07, + "loss": 0.0306, + "num_tokens": 76605703.0, + "reward": 0.0, + "reward_std": 0.9814079999923706, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05391611291083663, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0706107881880112, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503963, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1695 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1323.375, + "completions/mean_terminated_length": 1243.0909423828125, + "completions/min_length": 648.0, + "completions/min_terminated_length": 648.0, + "epoch": 0.42410602650662665, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.123633924245886, + "kl": 0.0119476318359375, + "learning_rate": 7.418281223210716e-07, + "loss": -0.0156, + "num_tokens": 76660061.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.050215482711792, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06307049256187527, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06613018455028405, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05947299418254506, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1696 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1318.0, + "completions/max_terminated_length": 1318.0, + "completions/mean_length": 1045.3125, + "completions/mean_terminated_length": 1045.3125, + "completions/min_length": 821.0, + "completions/min_terminated_length": 821.0, + "epoch": 0.4243560890222556, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2051728273277793, + "kl": 0.01251220703125, + "learning_rate": 7.414727192927351e-07, + "loss": -0.0521, + "num_tokens": 76709986.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9851381778717041, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0127984887051877, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06733280974314439, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1697 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1125.0, + "completions/mean_length": 1276.625, + "completions/mean_terminated_length": 1053.25, + "completions/min_length": 907.0, + "completions/min_terminated_length": 907.0, + "epoch": 0.42460615153788445, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1376703560829973, + "kl": 0.01556396484375, + "learning_rate": 7.411171703685514e-07, + "loss": 0.0017, + "num_tokens": 76765156.0, + "reward": 0.0, + "reward_std": 0.8955140113830566, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.038773185186371975, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1603149618922244, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921946, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1698 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1463.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 941.875, + "completions/mean_terminated_length": 941.875, + "completions/min_length": 683.0, + "completions/min_terminated_length": 683.0, + "epoch": 0.42485621405351337, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9686181842188124, + "kl": 0.010650634765625, + "learning_rate": 7.407614758194373e-07, + "loss": -0.0462, + "num_tokens": 76809794.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0385222434997559, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06093803126737424, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.254631958352538, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13214750456578045, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1699 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1105.0625, + "completions/mean_terminated_length": 1078.7333984375, + "completions/min_length": 760.0, + "completions/min_terminated_length": 760.0, + "epoch": 0.4251062765691423, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.694112506737599, + "kl": 0.009075164794921875, + "learning_rate": 7.4040563591642e-07, + "loss": 0.0143, + "num_tokens": 76860963.0, + "reward": 0.0, + "reward_std": 0.9114781618118286, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07560150087566755, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09182842342579518, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07187952884282611, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1700 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.9375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1499.4375, + "completions/mean_terminated_length": 1491.0, + "completions/min_length": 1491.0, + "completions/min_terminated_length": 1491.0, + "epoch": 0.4253563390847712, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2532617245789583, + "kl": 0.00901031494140625, + "learning_rate": 7.400496509306378e-07, + "loss": 0.0005, + "num_tokens": 76925458.0, + "reward": -2.60770320892334e-08, + "reward_std": 1.0568445920944214, + "rewards/wordcountpos_reward_GEOBench/mean": -2.60770320892334e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.035137096997930044, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04906992017892229, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09888264649460886, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1701 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1214.0, + "completions/max_terminated_length": 1214.0, + "completions/mean_length": 896.0625, + "completions/mean_terminated_length": 896.0625, + "completions/min_length": 575.0, + "completions/min_terminated_length": 575.0, + "epoch": 0.4256064016004001, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.749236576084285, + "kl": 0.0124053955078125, + "learning_rate": 7.396935211333395e-07, + "loss": -0.0099, + "num_tokens": 76953827.0, + "reward": 0.0, + "reward_std": 0.7670438885688782, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07372152712475051, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09683552252029443, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10945995377982527, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1702 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1026.0, + "completions/mean_terminated_length": 994.4000244140625, + "completions/min_length": 275.0, + "completions/min_terminated_length": 275.0, + "epoch": 0.425856464116029, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.851252886317879, + "kl": 0.0128173828125, + "learning_rate": 7.393372467958838e-07, + "loss": -0.167, + "num_tokens": 76990851.0, + "reward": 0.0, + "reward_std": 0.6378856897354126, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.015025376222257476, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07540394401121309, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08421753138505424, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1703 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1193.0, + "completions/max_terminated_length": 1193.0, + "completions/mean_length": 994.625, + "completions/mean_terminated_length": 994.625, + "completions/min_length": 715.0, + "completions/min_terminated_length": 715.0, + "epoch": 0.42610652663165793, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8978465067477184, + "kl": 0.01019287109375, + "learning_rate": 7.389808281897402e-07, + "loss": 0.0173, + "num_tokens": 77033085.0, + "reward": 0.0, + "reward_std": 0.7430769205093384, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07863594656246167, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12221623712776605, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042252, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1704 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1257.25, + "completions/mean_terminated_length": 1201.2308349609375, + "completions/min_length": 992.0, + "completions/min_terminated_length": 992.0, + "epoch": 0.4263565891472868, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2069804609260943, + "kl": 0.0113525390625, + "learning_rate": 7.38624265586488e-07, + "loss": 0.0291, + "num_tokens": 77077481.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0354727506637573, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04745677686696955, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06671713464725833, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746357, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1705 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1491.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1217.0625, + "completions/mean_terminated_length": 1217.0625, + "completions/min_length": 1059.0, + "completions/min_terminated_length": 1059.0, + "epoch": 0.4266066516629157, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0088637604468436, + "kl": 0.013427734375, + "learning_rate": 7.382675592578156e-07, + "loss": -0.0001, + "num_tokens": 77117018.0, + "reward": 0.0, + "reward_std": 0.8761802911758423, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10425917811159001, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11893819780068318, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901862, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1706 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 1314.3125, + "completions/mean_terminated_length": 1229.9091796875, + "completions/min_length": 988.0, + "completions/min_terminated_length": 988.0, + "epoch": 0.42685671417854465, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9380605851417436, + "kl": 0.015838623046875, + "learning_rate": 7.379107094755216e-07, + "loss": -0.0337, + "num_tokens": 77167799.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5786500573158264, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10940781069694486, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15194525267366268, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10036968702787749, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1707 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1425.0, + "completions/max_terminated_length": 1425.0, + "completions/mean_length": 1095.9375, + "completions/mean_terminated_length": 1095.9375, + "completions/min_length": 816.0, + "completions/min_terminated_length": 816.0, + "epoch": 0.4271067766941735, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.004245420592507, + "kl": 0.0107269287109375, + "learning_rate": 7.375537165115137e-07, + "loss": -0.0132, + "num_tokens": 77196694.0, + "reward": -4.0978193283081055e-08, + "reward_std": 0.8819522261619568, + "rewards/wordcountpos_reward_GEOBench/mean": -4.0978193283081055e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011007469059136209, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03084885559833136, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1708 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1307.625, + "completions/mean_terminated_length": 1243.5, + "completions/min_length": 989.0, + "completions/min_terminated_length": 989.0, + "epoch": 0.42735683920980244, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6462178358478408, + "kl": 0.011932373046875, + "learning_rate": 7.371965806378088e-07, + "loss": -0.0265, + "num_tokens": 77249184.0, + "reward": 0.0, + "reward_std": 1.0420149564743042, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.029018442732793238, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0577382897276513, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258098, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1709 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1433.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1063.4375, + "completions/mean_terminated_length": 1063.4375, + "completions/min_length": 771.0, + "completions/min_terminated_length": 771.0, + "epoch": 0.42760690172543137, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.605377153606494, + "kl": 0.0142364501953125, + "learning_rate": 7.36839302126532e-07, + "loss": -0.0411, + "num_tokens": 77296679.0, + "reward": 0.0, + "reward_std": 0.9608206748962402, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11887425479182742, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09131367292165721, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12285191326386657, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1710 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1107.0, + "completions/max_terminated_length": 1107.0, + "completions/mean_length": 867.0625, + "completions/mean_terminated_length": 867.0625, + "completions/min_length": 554.0, + "completions/min_terminated_length": 554.0, + "epoch": 0.4278569642410603, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.112921804976062, + "kl": 0.0234375, + "learning_rate": 7.364818812499184e-07, + "loss": -0.0486, + "num_tokens": 77326360.0, + "reward": 1.862645149230957e-08, + "reward_std": 1.0443978309631348, + "rewards/wordcountpos_reward_GEOBench/mean": 1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03813971622228321, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.050477600644084686, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.094182643679026, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1711 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1398.0, + "completions/max_terminated_length": 1398.0, + "completions/mean_length": 952.5, + "completions/mean_terminated_length": 952.5, + "completions/min_length": 738.0, + "completions/min_terminated_length": 738.0, + "epoch": 0.42810702675668916, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0869224386002947, + "kl": 0.00925445556640625, + "learning_rate": 7.361243182803104e-07, + "loss": 0.022, + "num_tokens": 77362904.0, + "reward": 0.0, + "reward_std": 0.6350979208946228, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07799173056752835, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.23144513332730576, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15389991938004774, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1712 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1285.1875, + "completions/mean_terminated_length": 1213.5833740234375, + "completions/min_length": 1057.0, + "completions/min_terminated_length": 1057.0, + "epoch": 0.4283570892723181, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.536567732158228, + "kl": 0.014007568359375, + "learning_rate": 7.35766613490159e-07, + "loss": -0.0009, + "num_tokens": 77395691.0, + "reward": 0.0, + "reward_std": 0.8846445083618164, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.004146092875305404, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08532619555189487, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.061913918736689035, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1713 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1384.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1062.5, + "completions/mean_terminated_length": 1062.5, + "completions/min_length": 650.0, + "completions/min_terminated_length": 650.0, + "epoch": 0.428607151787947, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7538269250157184, + "kl": 0.020294189453125, + "learning_rate": 7.354087671520237e-07, + "loss": -0.0567, + "num_tokens": 77438083.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.028388500213623, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1846293265482696, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.31719962724717665, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1714 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1230.0, + "completions/max_terminated_length": 1230.0, + "completions/mean_length": 1014.0, + "completions/mean_terminated_length": 1014.0, + "completions/min_length": 728.0, + "completions/min_terminated_length": 728.0, + "epoch": 0.4288572143035759, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8805665008034245, + "kl": 0.016998291015625, + "learning_rate": 7.350507795385712e-07, + "loss": -0.0318, + "num_tokens": 77477163.0, + "reward": 0.0, + "reward_std": 0.8361481428146362, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03416819974620741, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.044358444219603416, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1715 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1404.0, + "completions/mean_length": 1276.5, + "completions/mean_terminated_length": 1224.923095703125, + "completions/min_length": 1027.0, + "completions/min_terminated_length": 1027.0, + "epoch": 0.4291072768192048, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7126129603030376, + "kl": 0.0112762451171875, + "learning_rate": 7.346926509225764e-07, + "loss": -0.0341, + "num_tokens": 77520147.0, + "reward": 0.0, + "reward_std": 0.8212155103683472, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1874063548795113, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2081588523007934, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466157, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1716 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1049.0, + "completions/max_terminated_length": 1049.0, + "completions/mean_length": 882.25, + "completions/mean_terminated_length": 882.25, + "completions/min_length": 776.0, + "completions/min_terminated_length": 776.0, + "epoch": 0.4293573393348337, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.73297604009859, + "kl": 0.0142669677734375, + "learning_rate": 7.343343815769214e-07, + "loss": -0.0338, + "num_tokens": 77555727.0, + "reward": 0.0, + "reward_std": 1.0215754508972168, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.039226991674885886, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06510774110541348, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1717 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1264.0, + "completions/max_terminated_length": 1264.0, + "completions/mean_length": 979.6875, + "completions/mean_terminated_length": 979.6875, + "completions/min_length": 632.0, + "completions/min_terminated_length": 632.0, + "epoch": 0.4296074018504626, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.978206317028288, + "kl": 0.0184478759765625, + "learning_rate": 7.339759717745952e-07, + "loss": -0.0392, + "num_tokens": 77597850.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6792944669723511, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.19666774570664602, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0866628265154293, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466156, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1718 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1432.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1142.5625, + "completions/mean_terminated_length": 1142.5625, + "completions/min_length": 747.0, + "completions/min_terminated_length": 747.0, + "epoch": 0.4298574643660915, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8705708134504007, + "kl": 0.01568603515625, + "learning_rate": 7.336174217886944e-07, + "loss": -0.0486, + "num_tokens": 77643299.0, + "reward": 0.0, + "reward_std": 1.0433552265167236, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.030748640799835913, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12408752381645217, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1029203215725281, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1719 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1337.0, + "completions/mean_length": 1109.0, + "completions/mean_terminated_length": 1082.933349609375, + "completions/min_length": 820.0, + "completions/min_terminated_length": 820.0, + "epoch": 0.43010752688172044, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3070519347331673, + "kl": 0.0147705078125, + "learning_rate": 7.332587318924225e-07, + "loss": -0.0398, + "num_tokens": 77674963.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9584343433380127, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.014707138839407833, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0826795584417147, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567835, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1720 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1454.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 992.25, + "completions/mean_terminated_length": 992.25, + "completions/min_length": 627.0, + "completions/min_terminated_length": 627.0, + "epoch": 0.43035758939734936, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7680225960427114, + "kl": 0.0172271728515625, + "learning_rate": 7.328999023590886e-07, + "loss": -0.0349, + "num_tokens": 77728135.0, + "reward": 0.0, + "reward_std": 0.9037297964096069, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1114204587586881, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17379306814169848, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0909822937597079, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1721 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1222.25, + "completions/mean_terminated_length": 1158.1539306640625, + "completions/min_length": 642.0, + "completions/min_terminated_length": 642.0, + "epoch": 0.43060765191297823, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6786290392066037, + "kl": 0.0091705322265625, + "learning_rate": 7.325409334621094e-07, + "loss": -0.0539, + "num_tokens": 77763867.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0488533973693848, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08508669231626333, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08362584346829638, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1722 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1389.0, + "completions/mean_length": 1140.0625, + "completions/mean_terminated_length": 1088.6429443359375, + "completions/min_length": 881.0, + "completions/min_terminated_length": 881.0, + "epoch": 0.43085771442860715, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6573405994979438, + "kl": 0.00887298583984375, + "learning_rate": 7.32181825475007e-07, + "loss": -0.0095, + "num_tokens": 77806444.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.0274724960327148, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.015696922008556134, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03885191986265283, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316811, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1723 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 1131.75, + "completions/mean_terminated_length": 1107.2000732421875, + "completions/min_length": 891.0, + "completions/min_terminated_length": 891.0, + "epoch": 0.4311077769442361, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1339277926820523, + "kl": 0.0120849609375, + "learning_rate": 7.318225786714096e-07, + "loss": -0.0621, + "num_tokens": 77839760.0, + "reward": 0.0, + "reward_std": 0.9213806390762329, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0105053334906302, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.038765485455977564, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1724 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1077.0, + "completions/max_terminated_length": 1077.0, + "completions/mean_length": 842.1875, + "completions/mean_terminated_length": 842.1875, + "completions/min_length": 701.0, + "completions/min_terminated_length": 701.0, + "epoch": 0.43135783945986494, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4960659357896757, + "kl": 0.0141448974609375, + "learning_rate": 7.314631933250519e-07, + "loss": -0.0038, + "num_tokens": 77881723.0, + "reward": 0.0, + "reward_std": 0.9342890977859497, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.023563738181330045, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.055255357984741654, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10470416879457554, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1725 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1364.0, + "completions/max_terminated_length": 1364.0, + "completions/mean_length": 1179.5625, + "completions/mean_terminated_length": 1179.5625, + "completions/min_length": 1077.0, + "completions/min_terminated_length": 1077.0, + "epoch": 0.43160790197549387, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4904885454942765, + "kl": 0.0081024169921875, + "learning_rate": 7.311036697097731e-07, + "loss": -0.022, + "num_tokens": 77922460.0, + "reward": 0.0, + "reward_std": 0.8332759141921997, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.2173509805689877, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2192598061444211, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059472994182545036, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1726 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1437.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1154.0625, + "completions/mean_terminated_length": 1154.0625, + "completions/min_length": 801.0, + "completions/min_terminated_length": 801.0, + "epoch": 0.4318579644911228, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3728753049874474, + "kl": 0.01495361328125, + "learning_rate": 7.307440080995183e-07, + "loss": 0.0222, + "num_tokens": 77978557.0, + "reward": 0.0, + "reward_std": 0.5775347948074341, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02177433295852809, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12272861742177738, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11603000888978234, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1727 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1419.0, + "completions/mean_length": 1301.375, + "completions/mean_terminated_length": 1182.2000732421875, + "completions/min_length": 951.0, + "completions/min_terminated_length": 951.0, + "epoch": 0.43210802700675166, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9215357695538873, + "kl": 0.01129150390625, + "learning_rate": 7.303842087683378e-07, + "loss": 0.0199, + "num_tokens": 78031587.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.6386969089508057, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07208395989274538, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08027058004469234, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08944271909999162, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1728 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1090.0, + "completions/max_terminated_length": 1090.0, + "completions/mean_length": 966.9375, + "completions/mean_terminated_length": 966.9375, + "completions/min_length": 686.0, + "completions/min_terminated_length": 686.0, + "epoch": 0.4323580895223806, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.782385840436489, + "kl": 0.0137939453125, + "learning_rate": 7.300242719903869e-07, + "loss": -0.0105, + "num_tokens": 78073178.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.932848334312439, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.22527287549760217, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2249712670783646, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09428090415820636, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1729 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 1226.0, + "completions/mean_terminated_length": 1162.769287109375, + "completions/min_length": 795.0, + "completions/min_terminated_length": 795.0, + "epoch": 0.4326081520380095, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9581604616448964, + "kl": 0.014404296875, + "learning_rate": 7.29664198039925e-07, + "loss": 0.0587, + "num_tokens": 78122490.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0319743156433105, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03569536733034149, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15626830022715701, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10327955589886448, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1730 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 1103.125, + "completions/mean_terminated_length": 1046.4285888671875, + "completions/min_length": 693.0, + "completions/min_terminated_length": 693.0, + "epoch": 0.43285821455363843, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.678119502563029, + "kl": 0.0173797607421875, + "learning_rate": 7.293039871913168e-07, + "loss": 0.0342, + "num_tokens": 78157244.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0179539918899536, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06533908451181936, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03683203596842986, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353545, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1731 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1116.0625, + "completions/mean_terminated_length": 1090.4666748046875, + "completions/min_length": 838.0, + "completions/min_terminated_length": 838.0, + "epoch": 0.4331082770692673, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.273245854042366, + "kl": 0.0160064697265625, + "learning_rate": 7.28943639719031e-07, + "loss": -0.0203, + "num_tokens": 78204565.0, + "reward": -4.470348358154297e-08, + "reward_std": 1.0118680000305176, + "rewards/wordcountpos_reward_GEOBench/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.044196710581532475, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06610571614448972, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0910840068085298, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1732 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1221.0625, + "completions/mean_terminated_length": 1202.4666748046875, + "completions/min_length": 897.0, + "completions/min_terminated_length": 897.0, + "epoch": 0.4333583395848962, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1566576389389596, + "kl": 0.0133056640625, + "learning_rate": 7.285831558976403e-07, + "loss": 0.0247, + "num_tokens": 78257990.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.727614164352417, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1414838467664965, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0854671725564654, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1733 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1349.0, + "completions/max_terminated_length": 1349.0, + "completions/mean_length": 1124.0, + "completions/mean_terminated_length": 1124.0, + "completions/min_length": 876.0, + "completions/min_terminated_length": 876.0, + "epoch": 0.43360840210052515, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2609625937819455, + "kl": 0.0110931396484375, + "learning_rate": 7.282225360018214e-07, + "loss": 0.0388, + "num_tokens": 78303630.0, + "reward": 0.0, + "reward_std": 0.8154395222663879, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.006490947511628318, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16363698797396378, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14580555290954889, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1734 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1321.5, + "completions/mean_terminated_length": 1296.0, + "completions/min_length": 1013.0, + "completions/min_terminated_length": 1013.0, + "epoch": 0.433858464616154, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.471842414004994, + "kl": 0.011627197265625, + "learning_rate": 7.278617803063547e-07, + "loss": -0.0465, + "num_tokens": 78353286.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0611933469772339, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09297876635573582, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13299693401042684, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09953596037316065, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1735 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1446.3125, + "completions/mean_terminated_length": 1356.8333740234375, + "completions/min_length": 1251.0, + "completions/min_terminated_length": 1251.0, + "epoch": 0.43410852713178294, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.172860557073858, + "kl": 0.0118865966796875, + "learning_rate": 7.275008890861239e-07, + "loss": -0.003, + "num_tokens": 78409323.0, + "reward": 3.3527612686157227e-08, + "reward_std": 1.0643044710159302, + "rewards/wordcountpos_reward_GEOBench/mean": 3.3527612686157227e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07794313527846314, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1077587507249419, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298359, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 1736 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1390.0, + "completions/max_terminated_length": 1390.0, + "completions/mean_length": 1134.3125, + "completions/mean_terminated_length": 1134.3125, + "completions/min_length": 995.0, + "completions/min_terminated_length": 995.0, + "epoch": 0.43435858964741186, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3724610900387324, + "kl": 0.013824462890625, + "learning_rate": 7.271398626161165e-07, + "loss": 0.0068, + "num_tokens": 78456912.0, + "reward": 0.0, + "reward_std": 1.0006873607635498, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.002685526194033694, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14055960844739768, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1002773930432755, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1737 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1154.8125, + "completions/mean_terminated_length": 1075.1539306640625, + "completions/min_length": 795.0, + "completions/min_terminated_length": 795.0, + "epoch": 0.4346086521630408, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.376997089586702, + "kl": 0.00969696044921875, + "learning_rate": 7.267787011714224e-07, + "loss": 0.0115, + "num_tokens": 78492717.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7927964329719543, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03103645786805943, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09060260624124969, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15389991938004774, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1738 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.9375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1496.4375, + "completions/mean_terminated_length": 1443.0, + "completions/min_length": 1443.0, + "completions/min_terminated_length": 1443.0, + "epoch": 0.43485871467866966, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8630035660814754, + "kl": 0.00738525390625, + "learning_rate": 7.264174050272343e-07, + "loss": 0.0021, + "num_tokens": 78554164.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8677337169647217, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0057834860958937105, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.024919294725081597, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12041594578792297, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1739 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1331.0625, + "completions/mean_terminated_length": 1254.272705078125, + "completions/min_length": 1135.0, + "completions/min_terminated_length": 1135.0, + "epoch": 0.4351087771942986, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3069344938069767, + "kl": 0.016326904296875, + "learning_rate": 7.260559744588484e-07, + "loss": 0.0047, + "num_tokens": 78611877.0, + "reward": 0.0, + "reward_std": 0.858962893486023, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0690971168937208, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1326554517440602, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.102469507659596, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1740 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 1222.625, + "completions/mean_terminated_length": 1158.615478515625, + "completions/min_length": 900.0, + "completions/min_terminated_length": 900.0, + "epoch": 0.4353588397099275, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0515222299668214, + "kl": 0.0122528076171875, + "learning_rate": 7.256944097416626e-07, + "loss": -0.031, + "num_tokens": 78655519.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.067093849182129, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0668524912496514, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06855441173300109, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466156, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1741 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1383.6875, + "completions/mean_terminated_length": 1313.9000244140625, + "completions/min_length": 584.0, + "completions/min_terminated_length": 584.0, + "epoch": 0.43560890222555637, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9981414299940847, + "kl": 0.0107879638671875, + "learning_rate": 7.253327111511771e-07, + "loss": -0.0687, + "num_tokens": 78710290.0, + "reward": 0.0, + "reward_std": 0.9842308759689331, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.029425682990739516, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11129605748493877, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08432740427115681, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1742 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1231.0, + "completions/max_terminated_length": 1231.0, + "completions/mean_length": 1068.5, + "completions/mean_terminated_length": 1068.5, + "completions/min_length": 722.0, + "completions/min_terminated_length": 722.0, + "epoch": 0.4358589647411853, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6021120973529204, + "kl": 0.0134429931640625, + "learning_rate": 7.249708789629944e-07, + "loss": -0.067, + "num_tokens": 78753682.0, + "reward": 0.0, + "reward_std": 0.7004961371421814, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02301237569544516, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02969000990778225, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06763190130459203, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1743 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1466.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 947.125, + "completions/mean_terminated_length": 947.125, + "completions/min_length": 471.0, + "completions/min_terminated_length": 471.0, + "epoch": 0.4361090272568142, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7194505420430772, + "kl": 0.01190185546875, + "learning_rate": 7.246089134528183e-07, + "loss": -0.0136, + "num_tokens": 78791420.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.591182291507721, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07895784508337285, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1443797916596803, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11279282877125757, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1744 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1292.0, + "completions/mean_length": 1043.3125, + "completions/mean_terminated_length": 978.0714721679688, + "completions/min_length": 752.0, + "completions/min_terminated_length": 752.0, + "epoch": 0.4363590897724431, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0314524634729025, + "kl": 0.0123291015625, + "learning_rate": 7.242468148964545e-07, + "loss": 0.0303, + "num_tokens": 78828473.0, + "reward": -3.725290298461914e-08, + "reward_std": 1.0530834197998047, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12349324220457468, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10156307809411594, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941139, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1745 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1431.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1137.25, + "completions/mean_terminated_length": 1137.25, + "completions/min_length": 936.0, + "completions/min_terminated_length": 936.0, + "epoch": 0.436609152288072, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0132151994270084, + "kl": 0.0108489990234375, + "learning_rate": 7.238845835698104e-07, + "loss": 0.0162, + "num_tokens": 78864397.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.035042643547058, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.042735438098999864, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09639567624559706, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07290277645477447, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1746 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1118.375, + "completions/mean_terminated_length": 1063.857177734375, + "completions/min_length": 792.0, + "completions/min_terminated_length": 792.0, + "epoch": 0.43685921480370093, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4635750865558013, + "kl": 0.01702880859375, + "learning_rate": 7.235222197488941e-07, + "loss": 0.0866, + "num_tokens": 78900355.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.6471539735794067, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.004941797434114841, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09014994215246054, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1747 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1269.0, + "completions/mean_length": 1270.25, + "completions/mean_terminated_length": 974.857177734375, + "completions/min_length": 784.0, + "completions/min_terminated_length": 784.0, + "epoch": 0.43710927731932986, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.145723637158008, + "kl": 0.015960693359375, + "learning_rate": 7.231597237098145e-07, + "loss": 0.0558, + "num_tokens": 78952599.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9158209562301636, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11784004952869948, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15605331871893838, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05947299418254505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1748 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.75, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1409.875, + "completions/mean_terminated_length": 1139.5, + "completions/min_length": 652.0, + "completions/min_terminated_length": 652.0, + "epoch": 0.4373593398349587, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7650200492562895, + "kl": 0.02520751953125, + "learning_rate": 7.227970957287819e-07, + "loss": 0.0002, + "num_tokens": 79023157.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9558037519454956, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11323105500331629, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1207050315318295, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1749 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1214.3125, + "completions/mean_terminated_length": 1148.3846435546875, + "completions/min_length": 921.0, + "completions/min_terminated_length": 921.0, + "epoch": 0.43760940235058765, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9934471816747847, + "kl": 0.0143280029296875, + "learning_rate": 7.224343360821066e-07, + "loss": 0.0229, + "num_tokens": 79064114.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.891314685344696, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06803668024025236, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13222488295584306, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1750 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1459.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1135.3125, + "completions/mean_terminated_length": 1135.3125, + "completions/min_length": 732.0, + "completions/min_terminated_length": 732.0, + "epoch": 0.4378594648662166, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.747873574292098, + "kl": 0.0130767822265625, + "learning_rate": 7.220714450461993e-07, + "loss": -0.008, + "num_tokens": 79109207.0, + "reward": 0.0, + "reward_std": 0.7631886601448059, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.3598512854319067, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.4105622246466347, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11155467020454342, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1751 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1271.9375, + "completions/mean_terminated_length": 1135.0999755859375, + "completions/min_length": 808.0, + "completions/min_terminated_length": 808.0, + "epoch": 0.43810952738184544, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.000091741425933, + "kl": 0.0129547119140625, + "learning_rate": 7.217084228975711e-07, + "loss": 0.0045, + "num_tokens": 79159094.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8642512559890747, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0327719735472972, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11548826424730115, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0938872452190116, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1752 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1280.125, + "completions/mean_terminated_length": 1248.71435546875, + "completions/min_length": 986.0, + "completions/min_terminated_length": 986.0, + "epoch": 0.43835958989747437, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1869532214133622, + "kl": 0.016021728515625, + "learning_rate": 7.213452699128328e-07, + "loss": -0.0073, + "num_tokens": 79203384.0, + "reward": 0.0, + "reward_std": 0.7487567067146301, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.030608778313506585, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03498904794886396, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11894598836509011, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1753 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1239.1875, + "completions/mean_terminated_length": 1221.800048828125, + "completions/min_length": 869.0, + "completions/min_terminated_length": 869.0, + "epoch": 0.4386096524131033, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8740072292904326, + "kl": 0.0133056640625, + "learning_rate": 7.209819863686946e-07, + "loss": -0.055, + "num_tokens": 79254659.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9088636040687561, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01809540558727736, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0627926328259557, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258102, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1754 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 1025.375, + "completions/mean_terminated_length": 993.7333984375, + "completions/min_length": 691.0, + "completions/min_terminated_length": 691.0, + "epoch": 0.43885971492873216, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6632581837266223, + "kl": 0.016021728515625, + "learning_rate": 7.20618572541967e-07, + "loss": -0.1031, + "num_tokens": 79296833.0, + "reward": 0.0, + "reward_std": 0.9442328810691833, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.13237955052559283, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2620201491453505, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09888264649460887, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1755 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1140.3125, + "completions/mean_terminated_length": 1057.3077392578125, + "completions/min_length": 812.0, + "completions/min_terminated_length": 812.0, + "epoch": 0.4391097774443611, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.255747570845199, + "kl": 0.0129241943359375, + "learning_rate": 7.202550287095589e-07, + "loss": 0.0504, + "num_tokens": 79339438.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9310458898544312, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.040827301420741605, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07184500715063119, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06426219440409447, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1756 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1290.5, + "completions/mean_terminated_length": 1081.0, + "completions/min_length": 890.0, + "completions/min_terminated_length": 890.0, + "epoch": 0.43935983995999, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.394551494655838, + "kl": 0.009765625, + "learning_rate": 7.198913551484784e-07, + "loss": 0.0244, + "num_tokens": 79393878.0, + "reward": 0.0, + "reward_std": 0.6650184392929077, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.034479111912217184, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0912146946552084, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639735, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1757 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1172.375, + "completions/mean_terminated_length": 1150.533447265625, + "completions/min_length": 843.0, + "completions/min_terminated_length": 843.0, + "epoch": 0.43960990247561893, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.99859532970736, + "kl": 0.01702880859375, + "learning_rate": 7.195275521358332e-07, + "loss": -0.0178, + "num_tokens": 79435164.0, + "reward": 0.0, + "reward_std": 0.9591063261032104, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0711552130119176, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09361019214319827, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10246950765959599, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1758 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1322.0, + "completions/mean_length": 1132.5625, + "completions/mean_terminated_length": 1108.0667724609375, + "completions/min_length": 911.0, + "completions/min_terminated_length": 911.0, + "epoch": 0.4398599649912478, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.158331177721008, + "kl": 0.0156402587890625, + "learning_rate": 7.191636199488288e-07, + "loss": -0.0109, + "num_tokens": 79480405.0, + "reward": 0.0, + "reward_std": 1.024285078048706, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09307064603602523, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14816055447790852, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06871842709362772, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1759 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1332.0, + "completions/max_terminated_length": 1332.0, + "completions/mean_length": 1002.375, + "completions/mean_terminated_length": 1002.375, + "completions/min_length": 732.0, + "completions/min_terminated_length": 732.0, + "epoch": 0.4401100275068767, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4992451666166713, + "kl": 0.0125579833984375, + "learning_rate": 7.187995588647695e-07, + "loss": -0.0679, + "num_tokens": 79518443.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8065978288650513, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01849482044277656, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03679742744685715, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11122216672215289, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1760 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1209.0, + "completions/max_terminated_length": 1209.0, + "completions/mean_length": 859.5625, + "completions/mean_terminated_length": 859.5625, + "completions/min_length": 615.0, + "completions/min_terminated_length": 615.0, + "epoch": 0.44036009002250565, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.655608366760189, + "kl": 0.01177978515625, + "learning_rate": 7.184353691610579e-07, + "loss": -0.0009, + "num_tokens": 79548004.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9474237561225891, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.054433105395181744, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1761 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1200.0, + "completions/max_terminated_length": 1200.0, + "completions/mean_length": 978.875, + "completions/mean_terminated_length": 978.875, + "completions/min_length": 793.0, + "completions/min_terminated_length": 793.0, + "epoch": 0.4406101525381345, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.530099515490668, + "kl": 0.0155181884765625, + "learning_rate": 7.180710511151938e-07, + "loss": -0.0307, + "num_tokens": 79588602.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8672568202018738, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.032620479787234716, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12305321335687017, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08933913745655644, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1762 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1356.0, + "completions/mean_length": 1204.3125, + "completions/mean_terminated_length": 1026.9000244140625, + "completions/min_length": 826.0, + "completions/min_terminated_length": 826.0, + "epoch": 0.44086021505376344, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.32307232755478, + "kl": 0.0172119140625, + "learning_rate": 7.177066050047762e-07, + "loss": 0.0494, + "num_tokens": 79641095.0, + "reward": 0.0, + "reward_std": 0.5476629734039307, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09212123149389966, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09595846924614966, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10671873729054748, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1763 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1242.0, + "completions/mean_length": 995.375, + "completions/mean_terminated_length": 923.2857666015625, + "completions/min_length": 677.0, + "completions/min_terminated_length": 677.0, + "epoch": 0.44111027756939236, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.717105145981211, + "kl": 0.010162353515625, + "learning_rate": 7.173420311075007e-07, + "loss": 0.0015, + "num_tokens": 79686133.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9487942457199097, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04123598971078048, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17308141663117674, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1764 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1395.0, + "completions/mean_length": 1165.1875, + "completions/mean_terminated_length": 1087.923095703125, + "completions/min_length": 861.0, + "completions/min_terminated_length": 861.0, + "epoch": 0.44136034008502123, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9954402670076203, + "kl": 0.00778961181640625, + "learning_rate": 7.169773297011603e-07, + "loss": -0.0191, + "num_tokens": 79732000.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.947343111038208, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03944646565691791, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12743060227867836, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08595864638818419, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1765 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1225.0, + "completions/max_terminated_length": 1225.0, + "completions/mean_length": 1097.875, + "completions/mean_terminated_length": 1097.875, + "completions/min_length": 924.0, + "completions/min_terminated_length": 924.0, + "epoch": 0.44161040260065015, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6488036336604557, + "kl": 0.0115814208984375, + "learning_rate": 7.166125010636454e-07, + "loss": 0.0089, + "num_tokens": 79779998.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9248031377792358, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.026102186784378076, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15483447277843232, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1766 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1427.0, + "completions/mean_length": 1012.125, + "completions/mean_terminated_length": 979.6000366210938, + "completions/min_length": 623.0, + "completions/min_terminated_length": 623.0, + "epoch": 0.4418604651162791, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.128553877170301, + "kl": 0.0128936767578125, + "learning_rate": 7.162475454729434e-07, + "loss": -0.102, + "num_tokens": 79813128.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6737817525863647, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.17127612633615985, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2011099185962648, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08062257748298553, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1767 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1050.625, + "completions/mean_terminated_length": 1020.666748046875, + "completions/min_length": 607.0, + "completions/min_terminated_length": 607.0, + "epoch": 0.442110527631908, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.581598623889873, + "kl": 0.01090240478515625, + "learning_rate": 7.158824632071384e-07, + "loss": 0.049, + "num_tokens": 79844826.0, + "reward": 0.0, + "reward_std": 0.5851346850395203, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.047303182137547056, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0791722273622185, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0718795288428261, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1768 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1246.3125, + "completions/mean_terminated_length": 1210.071533203125, + "completions/min_length": 877.0, + "completions/min_terminated_length": 877.0, + "epoch": 0.44236059014753687, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.22580805672941, + "kl": 0.0157470703125, + "learning_rate": 7.155172545444107e-07, + "loss": -0.0199, + "num_tokens": 79900503.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8850240707397461, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09555074817234349, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11951263866545403, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10945995377982527, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1769 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1057.3125, + "completions/mean_terminated_length": 1027.800048828125, + "completions/min_length": 726.0, + "completions/min_terminated_length": 726.0, + "epoch": 0.4426106526631658, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2501340459378207, + "kl": 0.0126190185546875, + "learning_rate": 7.151519197630374e-07, + "loss": -0.0633, + "num_tokens": 79941324.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9877698421478271, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08932485591503647, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.23240599764047565, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13045504405165223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1770 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1254.8125, + "completions/mean_terminated_length": 1198.2308349609375, + "completions/min_length": 969.0, + "completions/min_terminated_length": 969.0, + "epoch": 0.4428607151787947, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8947883595269728, + "kl": 0.0132904052734375, + "learning_rate": 7.147864591413912e-07, + "loss": 0.0126, + "num_tokens": 79985857.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.624397873878479, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03155135496596188, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11434994341605202, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620107, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1771 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1068.3125, + "completions/mean_terminated_length": 1006.6428833007812, + "completions/min_length": 692.0, + "completions/min_terminated_length": 692.0, + "epoch": 0.4431107776944236, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8198587146071152, + "kl": 0.00920867919921875, + "learning_rate": 7.144208729579413e-07, + "loss": 0.0089, + "num_tokens": 80031766.0, + "reward": 0.0, + "reward_std": 0.702824056148529, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10789896976881103, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.30720574661146055, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06191391873668906, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1772 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1489.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1201.0, + "completions/mean_terminated_length": 1201.0, + "completions/min_length": 456.0, + "completions/min_terminated_length": 456.0, + "epoch": 0.4433608402100525, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2218681929133126, + "kl": 0.0128631591796875, + "learning_rate": 7.14055161491252e-07, + "loss": -0.0331, + "num_tokens": 80078006.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9834216833114624, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01171016202617066, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03379674702945372, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869929, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1773 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1265.0, + "completions/mean_length": 1135.5625, + "completions/mean_terminated_length": 1083.5, + "completions/min_length": 754.0, + "completions/min_terminated_length": 754.0, + "epoch": 0.44361090272568143, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0306208800040784, + "kl": 0.01593017578125, + "learning_rate": 7.136893250199832e-07, + "loss": 0.0067, + "num_tokens": 80123567.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0569403171539307, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.004031387714420754, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.045957305255878975, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059472994182545084, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1774 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1330.0, + "completions/max_terminated_length": 1330.0, + "completions/mean_length": 1005.5, + "completions/mean_terminated_length": 1005.5, + "completions/min_length": 605.0, + "completions/min_terminated_length": 605.0, + "epoch": 0.4438609652413103, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.577272910085154, + "kl": 0.0189971923828125, + "learning_rate": 7.133233638228901e-07, + "loss": -0.0041, + "num_tokens": 80174479.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.8840045928955078, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03343107084619072, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06654406692046057, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09583937179043481, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1775 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1489.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1046.25, + "completions/mean_terminated_length": 1046.25, + "completions/min_length": 731.0, + "completions/min_terminated_length": 731.0, + "epoch": 0.4441110277569392, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.348610876938014, + "kl": 0.01678466796875, + "learning_rate": 7.129572781788233e-07, + "loss": 0.0132, + "num_tokens": 80228899.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8301129341125488, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.024496591467949864, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03720918318696884, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05443310539518175, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1776 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1356.6875, + "completions/mean_terminated_length": 1213.375, + "completions/min_length": 1016.0, + "completions/min_terminated_length": 1016.0, + "epoch": 0.44436109027256815, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6269234754651802, + "kl": 0.0122833251953125, + "learning_rate": 7.125910683667276e-07, + "loss": -0.013, + "num_tokens": 80277558.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.6708970665931702, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04568759381623745, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12001984104600781, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06652763279965648, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 1777 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1292.3125, + "completions/mean_terminated_length": 1244.3846435546875, + "completions/min_length": 960.0, + "completions/min_terminated_length": 960.0, + "epoch": 0.4446111527881971, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.303847689855226, + "kl": 0.0165863037109375, + "learning_rate": 7.122247346656429e-07, + "loss": -0.0197, + "num_tokens": 80323531.0, + "reward": 0.0, + "reward_std": 0.8511629104614258, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.008519615147881998, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09396052519361589, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1517795672580372, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1778 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1152.6875, + "completions/mean_terminated_length": 1103.071533203125, + "completions/min_length": 556.0, + "completions/min_terminated_length": 556.0, + "epoch": 0.44486121530382594, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4242022113398627, + "kl": 0.015380859375, + "learning_rate": 7.118582773547032e-07, + "loss": -0.0138, + "num_tokens": 80367718.0, + "reward": 0.0, + "reward_std": 0.776917040348053, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10029205329443859, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11241085411765138, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09737289911202955, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1779 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1418.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 1006.6875, + "completions/mean_terminated_length": 1006.6875, + "completions/min_length": 664.0, + "completions/min_terminated_length": 664.0, + "epoch": 0.44511127781945486, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.573025363302842, + "kl": 0.013458251953125, + "learning_rate": 7.114916967131366e-07, + "loss": -0.0785, + "num_tokens": 80407921.0, + "reward": 1.862645149230957e-08, + "reward_std": 1.0626318454742432, + "rewards/wordcountpos_reward_GEOBench/mean": 1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.020508260260716032, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07347851265227373, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08255189164891874, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1780 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1401.75, + "completions/mean_terminated_length": 1303.5, + "completions/min_length": 1138.0, + "completions/min_terminated_length": 1138.0, + "epoch": 0.4453613403350838, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7790981296348964, + "kl": 0.012420654296875, + "learning_rate": 7.111249930202657e-07, + "loss": 0.0293, + "num_tokens": 80464069.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9746863842010498, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.022965894085254176, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08007219371619458, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04533823502911818, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1781 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1238.5625, + "completions/mean_terminated_length": 1178.2308349609375, + "completions/min_length": 919.0, + "completions/min_terminated_length": 919.0, + "epoch": 0.44561140285071266, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2932830329874676, + "kl": 0.0088043212890625, + "learning_rate": 7.107581665555062e-07, + "loss": 0.0255, + "num_tokens": 80513166.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7266916036605835, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0015385893478430325, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07968069383475845, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10036968702787749, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1782 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1393.0, + "completions/max_terminated_length": 1393.0, + "completions/mean_length": 1188.25, + "completions/mean_terminated_length": 1188.25, + "completions/min_length": 887.0, + "completions/min_terminated_length": 887.0, + "epoch": 0.4458614653663416, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0811129631578256, + "kl": 0.014556884765625, + "learning_rate": 7.103912175983679e-07, + "loss": -0.0213, + "num_tokens": 80559794.0, + "reward": 0.0, + "reward_std": 0.8286514282226562, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.053960180894580136, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06614597197559402, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09269623828717428, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1783 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1197.0625, + "completions/mean_terminated_length": 1127.1539306640625, + "completions/min_length": 998.0, + "completions/min_terminated_length": 998.0, + "epoch": 0.4461115278819705, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2446396128271204, + "kl": 0.0157318115234375, + "learning_rate": 7.100241464284538e-07, + "loss": 0.0073, + "num_tokens": 80600915.0, + "reward": 0.0, + "reward_std": 1.0566080808639526, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.018530748738778177, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06457528417840099, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10027739304327551, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1784 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1305.875, + "completions/mean_terminated_length": 1217.6363525390625, + "completions/min_length": 1077.0, + "completions/min_terminated_length": 1077.0, + "epoch": 0.4463615903975994, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9813437898581236, + "kl": 0.0136566162109375, + "learning_rate": 7.096569533254597e-07, + "loss": -0.032, + "num_tokens": 80653049.0, + "reward": 0.0, + "reward_std": 0.8882765769958496, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009263734489561153, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10798497885707327, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07698003589195014, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1785 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1426.3125, + "completions/mean_terminated_length": 1369.0, + "completions/min_length": 1192.0, + "completions/min_terminated_length": 1192.0, + "epoch": 0.4466116529132283, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9111830735580284, + "kl": 0.014129638671875, + "learning_rate": 7.092896385691745e-07, + "loss": -0.0045, + "num_tokens": 80702414.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5338138937950134, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.030649367457600617, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15351459166154627, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13158576980363348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1786 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1338.0, + "completions/mean_terminated_length": 1240.800048828125, + "completions/min_length": 814.0, + "completions/min_terminated_length": 814.0, + "epoch": 0.4468617154288572, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.010421628922645, + "kl": 0.013275146484375, + "learning_rate": 7.0892220243948e-07, + "loss": -0.0278, + "num_tokens": 80738806.0, + "reward": 0.0, + "reward_std": 0.6607098579406738, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.14850156919333957, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12834756432933478, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1787 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1372.0, + "completions/max_terminated_length": 1372.0, + "completions/mean_length": 1042.6875, + "completions/mean_terminated_length": 1042.6875, + "completions/min_length": 732.0, + "completions/min_terminated_length": 732.0, + "epoch": 0.44711177794448614, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0792091275088906, + "kl": 0.0149078369140625, + "learning_rate": 7.085546452163503e-07, + "loss": 0.0037, + "num_tokens": 80781753.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.7805219888687134, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07368400077267513, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12121137509898751, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1788 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1431.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 986.75, + "completions/mean_terminated_length": 986.75, + "completions/min_length": 646.0, + "completions/min_terminated_length": 646.0, + "epoch": 0.447361840460115, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3169074304568302, + "kl": 0.017120361328125, + "learning_rate": 7.081869671798518e-07, + "loss": -0.0366, + "num_tokens": 80820429.0, + "reward": 0.0, + "reward_std": 0.9583220481872559, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03117995752896387, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1129815570032981, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11344765475923412, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1789 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1320.0625, + "completions/mean_terminated_length": 1308.0667724609375, + "completions/min_length": 1000.0, + "completions/min_terminated_length": 1000.0, + "epoch": 0.44761190297574394, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.56580107611222, + "kl": 0.0110931396484375, + "learning_rate": 7.078191686101428e-07, + "loss": -0.0341, + "num_tokens": 80873734.0, + "reward": 0.0, + "reward_std": 0.6024472117424011, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03291511150889182, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06833850828329827, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08777074514725108, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1790 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1347.125, + "completions/mean_terminated_length": 1228.2222900390625, + "completions/min_length": 752.0, + "completions/min_terminated_length": 752.0, + "epoch": 0.44786196549137286, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.307648776218062, + "kl": 0.00980377197265625, + "learning_rate": 7.074512497874738e-07, + "loss": -0.038, + "num_tokens": 80927800.0, + "reward": 0.0, + "reward_std": 0.7424743175506592, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.008556563201803584, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06004742077236561, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11155467020454343, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1791 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1296.0, + "completions/mean_length": 1117.0625, + "completions/mean_terminated_length": 1062.357177734375, + "completions/min_length": 826.0, + "completions/min_terminated_length": 826.0, + "epoch": 0.44811202800700173, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8661619316213836, + "kl": 0.01629638671875, + "learning_rate": 7.070832109921864e-07, + "loss": -0.0706, + "num_tokens": 80981857.0, + "reward": 0.0, + "reward_std": 0.7173156142234802, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09709224539448146, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09677121105481441, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14580555290954889, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1792 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1422.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 1182.8125, + "completions/mean_terminated_length": 1182.8125, + "completions/min_length": 914.0, + "completions/min_terminated_length": 914.0, + "epoch": 0.44836209052263065, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.627784717408285, + "kl": 0.0159149169921875, + "learning_rate": 7.067150525047143e-07, + "loss": -0.0186, + "num_tokens": 81026518.0, + "reward": 2.60770320892334e-08, + "reward_std": 1.0681921243667603, + "rewards/wordcountpos_reward_GEOBench/mean": 2.60770320892334e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0044280787298943145, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07828677337546366, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921948, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1793 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1291.8125, + "completions/mean_terminated_length": 1222.416748046875, + "completions/min_length": 895.0, + "completions/min_terminated_length": 895.0, + "epoch": 0.4486121530382596, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.902495972454608, + "kl": 0.0142364501953125, + "learning_rate": 7.063467746055817e-07, + "loss": 0.0074, + "num_tokens": 81082211.0, + "reward": -5.21540641784668e-08, + "reward_std": 1.039199948310852, + "rewards/wordcountpos_reward_GEOBench/mean": -5.21540641784668e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.005823222026836945, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07025191268027792, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869929, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1794 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1339.25, + "completions/mean_terminated_length": 1214.2222900390625, + "completions/min_length": 965.0, + "completions/min_terminated_length": 965.0, + "epoch": 0.4488622155538885, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.114332829059098, + "kl": 0.01751708984375, + "learning_rate": 7.059783775754041e-07, + "loss": -0.0164, + "num_tokens": 81138383.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9422798156738281, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05889831752777307, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12869412780787987, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05443310539518177, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1795 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1305.25, + "completions/mean_terminated_length": 1240.3333740234375, + "completions/min_length": 1009.0, + "completions/min_terminated_length": 1009.0, + "epoch": 0.44911227806951737, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.594331202999296, + "kl": 0.006744384765625, + "learning_rate": 7.056098616948883e-07, + "loss": 0.0051, + "num_tokens": 81193419.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.90351402759552, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.025220675089580532, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06350144918563556, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11417984514369005, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1796 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1474.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1263.375, + "completions/mean_terminated_length": 1263.375, + "completions/min_length": 934.0, + "completions/min_terminated_length": 934.0, + "epoch": 0.4493623405851463, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1903209565486432, + "kl": 0.0191650390625, + "learning_rate": 7.052412272448306e-07, + "loss": -0.0212, + "num_tokens": 81236145.0, + "reward": 0.0, + "reward_std": 1.0031390190124512, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06237353393247602, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11106377186784396, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12041594578792297, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1797 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1148.625, + "completions/mean_terminated_length": 1125.2000732421875, + "completions/min_length": 738.0, + "completions/min_terminated_length": 738.0, + "epoch": 0.4496124031007752, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9087128384740453, + "kl": 0.01275634765625, + "learning_rate": 7.048724745061184e-07, + "loss": 0.0326, + "num_tokens": 81283691.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.032902717590332, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0949022285792119, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14708345346600446, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08073734277593311, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1798 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1209.0, + "completions/max_terminated_length": 1209.0, + "completions/mean_length": 951.5625, + "completions/mean_terminated_length": 951.5625, + "completions/min_length": 706.0, + "completions/min_terminated_length": 706.0, + "epoch": 0.4498624656164041, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.837001309075126, + "kl": 0.011749267578125, + "learning_rate": 7.045036037597292e-07, + "loss": -0.0113, + "num_tokens": 81314164.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9803147912025452, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.021362954063347095, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04944362179433061, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08432740427115681, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1799 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1290.0, + "completions/max_terminated_length": 1290.0, + "completions/mean_length": 1071.875, + "completions/mean_terminated_length": 1071.875, + "completions/min_length": 842.0, + "completions/min_terminated_length": 842.0, + "epoch": 0.450112528132033, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.606919477648884, + "kl": 0.020050048828125, + "learning_rate": 7.041346152867299e-07, + "loss": -0.0333, + "num_tokens": 81350642.0, + "reward": 0.0, + "reward_std": 1.0209102630615234, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.2214101026684669, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0809985609378142, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12102953419784838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1800 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1249.0, + "completions/max_terminated_length": 1249.0, + "completions/mean_length": 986.875, + "completions/mean_terminated_length": 986.875, + "completions/min_length": 649.0, + "completions/min_terminated_length": 649.0, + "epoch": 0.45036259064766193, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3909562165765803, + "kl": 0.0138397216796875, + "learning_rate": 7.037655093682777e-07, + "loss": 0.0195, + "num_tokens": 81380816.0, + "reward": 0.0, + "reward_std": 0.6577624678611755, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.20922832187133542, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3030427282473595, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11894598836509011, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1801 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1285.875, + "completions/mean_terminated_length": 1255.2857666015625, + "completions/min_length": 1051.0, + "completions/min_terminated_length": 1051.0, + "epoch": 0.4506126531632908, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0369358797511135, + "kl": 0.0125579833984375, + "learning_rate": 7.033962862856189e-07, + "loss": -0.0365, + "num_tokens": 81417710.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.47366780042648315, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005624097070751487, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09566841488684191, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.055611083361076466, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1802 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1328.75, + "completions/mean_terminated_length": 1289.2308349609375, + "completions/min_length": 1170.0, + "completions/min_terminated_length": 1170.0, + "epoch": 0.4508627156789197, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.979922172831645, + "kl": 0.015899658203125, + "learning_rate": 7.030269463200893e-07, + "loss": -0.008, + "num_tokens": 81465682.0, + "reward": 0.0, + "reward_std": 0.8769499659538269, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11064615040516027, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21069063252905032, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1803 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1277.5625, + "completions/mean_terminated_length": 1176.45458984375, + "completions/min_length": 997.0, + "completions/min_terminated_length": 997.0, + "epoch": 0.45111277819454865, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.107363669211601, + "kl": 0.0143280029296875, + "learning_rate": 7.026574897531135e-07, + "loss": 0.0399, + "num_tokens": 81524067.0, + "reward": 0.0, + "reward_std": 0.5496261119842529, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1552750273131809, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1293824186258128, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11409872268574492, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1804 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1135.0, + "completions/max_terminated_length": 1135.0, + "completions/mean_length": 984.125, + "completions/mean_terminated_length": 984.125, + "completions/min_length": 767.0, + "completions/min_terminated_length": 767.0, + "epoch": 0.45136284071017757, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7034733528545325, + "kl": 0.012481689453125, + "learning_rate": 7.022879168662056e-07, + "loss": 0.0122, + "num_tokens": 81564661.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.952262282371521, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05499490597414347, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07306030691563313, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11925695879998881, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1805 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1339.9375, + "completions/mean_terminated_length": 1303.0, + "completions/min_length": 1010.0, + "completions/min_terminated_length": 1010.0, + "epoch": 0.45161290322580644, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.167652520001269, + "kl": 0.0208587646484375, + "learning_rate": 7.019182279409674e-07, + "loss": 0.0047, + "num_tokens": 81621732.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6819903254508972, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.14126256107169105, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15277886917417788, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07290277645477444, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1806 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1357.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 1228.25, + "completions/mean_terminated_length": 1228.25, + "completions/min_length": 1070.0, + "completions/min_terminated_length": 1070.0, + "epoch": 0.45186296574143536, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0820336648260027, + "kl": 0.013458251953125, + "learning_rate": 7.015484232590896e-07, + "loss": 0.0165, + "num_tokens": 81667816.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.6604877710342407, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08060832536562104, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07064259217550571, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1807 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1393.0, + "completions/mean_length": 1088.25, + "completions/mean_terminated_length": 901.0909423828125, + "completions/min_length": 621.0, + "completions/min_terminated_length": 621.0, + "epoch": 0.4521130282570643, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9411392672408136, + "kl": 0.0128936767578125, + "learning_rate": 7.011785031023511e-07, + "loss": 0.0421, + "num_tokens": 81725316.0, + "reward": 0.0, + "reward_std": 0.972832441329956, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03539061856528414, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05561642059893787, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12816366850994057, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1808 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1299.0, + "completions/max_terminated_length": 1299.0, + "completions/mean_length": 1145.9375, + "completions/mean_terminated_length": 1145.9375, + "completions/min_length": 902.0, + "completions/min_terminated_length": 902.0, + "epoch": 0.45236309077269315, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.156106363417257, + "kl": 0.015380859375, + "learning_rate": 7.008084677526189e-07, + "loss": 0.0043, + "num_tokens": 81770595.0, + "reward": 0.0, + "reward_std": 0.9694223403930664, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.039001790045966975, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05646049254644033, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639735, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1809 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1163.25, + "completions/mean_terminated_length": 1010.1818237304688, + "completions/min_length": 505.0, + "completions/min_terminated_length": 505.0, + "epoch": 0.4526131532883221, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7036411602763786, + "kl": 0.015625, + "learning_rate": 7.004383174918477e-07, + "loss": -0.0816, + "num_tokens": 81822983.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.6871200799942017, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.025816511173255605, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21139822330343222, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.102469507659596, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1810 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1394.0, + "completions/mean_length": 1169.0625, + "completions/mean_terminated_length": 970.5, + "completions/min_length": 633.0, + "completions/min_terminated_length": 633.0, + "epoch": 0.452863215803951, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.821090681250282, + "kl": 0.032379150390625, + "learning_rate": 7.000680526020795e-07, + "loss": 0.0449, + "num_tokens": 81882528.0, + "reward": 0.0, + "reward_std": 0.8553345203399658, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.014311164163103546, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17868124819457362, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10470416879457554, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1811 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1304.0, + "completions/max_terminated_length": 1304.0, + "completions/mean_length": 1099.625, + "completions/mean_terminated_length": 1099.625, + "completions/min_length": 904.0, + "completions/min_terminated_length": 904.0, + "epoch": 0.45311327831957987, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3841720726829596, + "kl": 0.014923095703125, + "learning_rate": 6.996976733654439e-07, + "loss": -0.0101, + "num_tokens": 81920826.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9807734489440918, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03502607553233713, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.061927406764859756, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0666666666666667, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1812 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1132.25, + "completions/mean_terminated_length": 1107.7333984375, + "completions/min_length": 763.0, + "completions/min_terminated_length": 763.0, + "epoch": 0.4533633408352088, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.447963253787224, + "kl": 0.0171966552734375, + "learning_rate": 6.993271800641575e-07, + "loss": -0.0471, + "num_tokens": 81964246.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0488839149475098, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11438776559315773, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13211837268360518, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6499999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466153, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1813 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1315.0, + "completions/mean_length": 1150.125, + "completions/mean_terminated_length": 1126.800048828125, + "completions/min_length": 885.0, + "completions/min_terminated_length": 885.0, + "epoch": 0.4536134033508377, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7596478752745393, + "kl": 0.0112762451171875, + "learning_rate": 6.989565729805241e-07, + "loss": -0.0351, + "num_tokens": 81998728.0, + "reward": 0.0, + "reward_std": 0.8011308908462524, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08187443568548239, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07144391658171517, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15341785110291775, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1814 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1295.0, + "completions/mean_length": 1035.1875, + "completions/mean_terminated_length": 1004.2000732421875, + "completions/min_length": 753.0, + "completions/min_terminated_length": 753.0, + "epoch": 0.45386346586646664, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0261100190824632, + "kl": 0.01611328125, + "learning_rate": 6.985858523969337e-07, + "loss": -0.0477, + "num_tokens": 82027651.0, + "reward": 0.0, + "reward_std": 0.9581598043441772, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02131647998315056, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19647752710891742, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06540472290116196, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1815 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1329.0, + "completions/max_terminated_length": 1329.0, + "completions/mean_length": 1095.5625, + "completions/mean_terminated_length": 1095.5625, + "completions/min_length": 766.0, + "completions/min_terminated_length": 766.0, + "epoch": 0.4541135283820955, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.574832764685832, + "kl": 0.0159454345703125, + "learning_rate": 6.982150185958632e-07, + "loss": 0.0039, + "num_tokens": 82063980.0, + "reward": 0.0, + "reward_std": 1.044386625289917, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010222422859756514, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10181211444363265, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08073734277593314, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1816 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1467.0, + "completions/mean_length": 1182.125, + "completions/mean_terminated_length": 1037.6363525390625, + "completions/min_length": 608.0, + "completions/min_terminated_length": 608.0, + "epoch": 0.45436359089772443, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.982040602777878, + "kl": 0.012664794921875, + "learning_rate": 6.978440718598756e-07, + "loss": -0.0435, + "num_tokens": 82118350.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9846587181091309, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04692783311494097, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14575672976674883, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1817 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1261.0, + "completions/max_terminated_length": 1261.0, + "completions/mean_length": 1093.8125, + "completions/mean_terminated_length": 1093.8125, + "completions/min_length": 836.0, + "completions/min_terminated_length": 836.0, + "epoch": 0.45461365341335336, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.67269809621732, + "kl": 0.017791748046875, + "learning_rate": 6.9747301247162e-07, + "loss": -0.0203, + "num_tokens": 82159011.0, + "reward": 0.0, + "reward_std": 0.6304547190666199, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03654020835897768, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05121618665724798, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1818 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1100.5625, + "completions/mean_terminated_length": 1073.933349609375, + "completions/min_length": 826.0, + "completions/min_terminated_length": 826.0, + "epoch": 0.4548637159289822, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.910056074828761, + "kl": 0.0131072998046875, + "learning_rate": 6.971018407138314e-07, + "loss": -0.017, + "num_tokens": 82210732.0, + "reward": -1.862645149230957e-08, + "reward_std": 1.0657434463500977, + "rewards/wordcountpos_reward_GEOBench/mean": -1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.035128178578067826, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13847914427857014, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1819 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1219.0, + "completions/max_terminated_length": 1219.0, + "completions/mean_length": 960.1875, + "completions/mean_terminated_length": 960.1875, + "completions/min_length": 630.0, + "completions/min_terminated_length": 630.0, + "epoch": 0.45511377844461115, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9113083974573892, + "kl": 0.0117034912109375, + "learning_rate": 6.967305568693303e-07, + "loss": 0.0384, + "num_tokens": 82254311.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.968044638633728, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.009167979590731502, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05476386777156224, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09803627446568497, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1820 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1275.0, + "completions/max_terminated_length": 1275.0, + "completions/mean_length": 1167.5625, + "completions/mean_terminated_length": 1167.5625, + "completions/min_length": 816.0, + "completions/min_terminated_length": 816.0, + "epoch": 0.4553638409602401, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1658157293907507, + "kl": 0.0162811279296875, + "learning_rate": 6.963591612210223e-07, + "loss": -0.037, + "num_tokens": 82304712.0, + "reward": 0.0, + "reward_std": 0.8207736611366272, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06421436722534012, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12224954607289464, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7124999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06309898162000305, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1821 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1387.875, + "completions/mean_terminated_length": 1336.9091796875, + "completions/min_length": 1216.0, + "completions/min_terminated_length": 1216.0, + "epoch": 0.45561390347586894, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.71155353369562, + "kl": 0.0126800537109375, + "learning_rate": 6.959876540518991e-07, + "loss": 0.0183, + "num_tokens": 82358174.0, + "reward": 0.0, + "reward_std": 0.4597734212875366, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10724303915387337, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19482584690965446, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.19734346820820914, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1822 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1312.625, + "completions/mean_terminated_length": 1250.166748046875, + "completions/min_length": 1042.0, + "completions/min_terminated_length": 1042.0, + "epoch": 0.45586396599149787, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1259597767257334, + "kl": 0.0166473388671875, + "learning_rate": 6.956160356450364e-07, + "loss": -0.0286, + "num_tokens": 82405632.0, + "reward": 0.0, + "reward_std": 0.6679286956787109, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09562542840201543, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1315280000219106, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11917929226045818, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1823 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1333.5625, + "completions/mean_terminated_length": 1295.1539306640625, + "completions/min_length": 1162.0, + "completions/min_terminated_length": 1162.0, + "epoch": 0.4561140285071268, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5166750264777815, + "kl": 0.0108184814453125, + "learning_rate": 6.952443062835954e-07, + "loss": 0.0202, + "num_tokens": 82444593.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0097635984420776, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0824531244519275, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03684521918502093, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563383, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1824 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1387.0, + "completions/max_terminated_length": 1387.0, + "completions/mean_length": 1060.9375, + "completions/mean_terminated_length": 1060.9375, + "completions/min_length": 885.0, + "completions/min_terminated_length": 885.0, + "epoch": 0.4563640910227557, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.830732145524097, + "kl": 0.0129547119140625, + "learning_rate": 6.948724662508214e-07, + "loss": -0.0322, + "num_tokens": 82480536.0, + "reward": 0.0, + "reward_std": 0.8124505281448364, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.015338984229879286, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03360756299492685, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1825 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1295.0, + "completions/max_terminated_length": 1295.0, + "completions/mean_length": 990.9375, + "completions/mean_terminated_length": 990.9375, + "completions/min_length": 596.0, + "completions/min_terminated_length": 596.0, + "epoch": 0.4566141535383846, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8513147489166566, + "kl": 0.0145721435546875, + "learning_rate": 6.945005158300438e-07, + "loss": 0.0032, + "num_tokens": 82514103.0, + "reward": 0.0, + "reward_std": 0.9960329532623291, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04540406116600847, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09436945132145678, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10027739304327551, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1826 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1310.0, + "completions/max_terminated_length": 1310.0, + "completions/mean_length": 971.0625, + "completions/mean_terminated_length": 971.0625, + "completions/min_length": 524.0, + "completions/min_terminated_length": 524.0, + "epoch": 0.4568642160540135, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.368200318893693, + "kl": 0.01171875, + "learning_rate": 6.941284553046769e-07, + "loss": -0.0135, + "num_tokens": 82557336.0, + "reward": 0.0, + "reward_std": 0.9977255463600159, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.010722299504217069, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09330477559934888, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08432740427115676, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1827 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1274.875, + "completions/mean_terminated_length": 1259.86669921875, + "completions/min_length": 946.0, + "completions/min_terminated_length": 946.0, + "epoch": 0.45711427856964243, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8574284532317344, + "kl": 0.012908935546875, + "learning_rate": 6.937562849582184e-07, + "loss": -0.0009, + "num_tokens": 82616214.0, + "reward": 0.0, + "reward_std": 0.8704149723052979, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0850594943204607, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14907198994293339, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133131, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1828 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1138.0625, + "completions/mean_terminated_length": 973.5454711914062, + "completions/min_length": 656.0, + "completions/min_terminated_length": 656.0, + "epoch": 0.4573643410852713, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.437617765081333, + "kl": 0.019317626953125, + "learning_rate": 6.933840050742494e-07, + "loss": -0.0109, + "num_tokens": 82662703.0, + "reward": 0.0, + "reward_std": 1.0448416471481323, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.029058579767837586, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13053124660818144, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563383, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1829 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1313.0, + "completions/max_terminated_length": 1313.0, + "completions/mean_length": 1036.75, + "completions/mean_terminated_length": 1036.75, + "completions/min_length": 778.0, + "completions/min_terminated_length": 778.0, + "epoch": 0.4576144036009002, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5743547446318806, + "kl": 0.0259552001953125, + "learning_rate": 6.930116159364351e-07, + "loss": 0.0057, + "num_tokens": 82704499.0, + "reward": 0.0, + "reward_std": 0.9025289416313171, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.16100855821763016, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2565515967337793, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066221, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1830 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1312.0, + "completions/mean_length": 1348.5625, + "completions/mean_terminated_length": 1153.857177734375, + "completions/min_length": 959.0, + "completions/min_terminated_length": 959.0, + "epoch": 0.45786446611652915, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9628574770398965, + "kl": 0.013702392578125, + "learning_rate": 6.926391178285236e-07, + "loss": -0.0172, + "num_tokens": 82765620.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0314022302627563, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.050153758598882395, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06498519115013517, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08073734277593314, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1831 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1175.0, + "completions/max_terminated_length": 1175.0, + "completions/mean_length": 1033.25, + "completions/mean_terminated_length": 1033.25, + "completions/min_length": 809.0, + "completions/min_terminated_length": 809.0, + "epoch": 0.458114528632158, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8888649831081823, + "kl": 0.0177459716796875, + "learning_rate": 6.922665110343461e-07, + "loss": -0.0187, + "num_tokens": 82809536.0, + "reward": 0.0, + "reward_std": 0.7812528014183044, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06915200509562117, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.051419356331221196, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15389991938004774, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1832 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1225.8125, + "completions/mean_terminated_length": 1186.6429443359375, + "completions/min_length": 993.0, + "completions/min_terminated_length": 993.0, + "epoch": 0.45836459114778694, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9402665726107884, + "kl": 0.013946533203125, + "learning_rate": 6.918937958378164e-07, + "loss": -0.0025, + "num_tokens": 82857325.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0259133577346802, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011231261614965108, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09716848063692403, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11279282877125756, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1833 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1272.0, + "completions/max_terminated_length": 1272.0, + "completions/mean_length": 999.4375, + "completions/mean_terminated_length": 999.4375, + "completions/min_length": 792.0, + "completions/min_terminated_length": 792.0, + "epoch": 0.45861465366341586, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0989169394902087, + "kl": 0.01239013671875, + "learning_rate": 6.915209725229314e-07, + "loss": -0.077, + "num_tokens": 82896924.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.022628903388977, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0600167277913412, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06939960333862358, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1834 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1328.0, + "completions/max_terminated_length": 1328.0, + "completions/mean_length": 1025.6875, + "completions/mean_terminated_length": 1025.6875, + "completions/min_length": 573.0, + "completions/min_terminated_length": 573.0, + "epoch": 0.4588647161790448, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.228210909657755, + "kl": 0.012969970703125, + "learning_rate": 6.911480413737698e-07, + "loss": -0.0845, + "num_tokens": 82935015.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5119550824165344, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.061661498127497126, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07698606972614011, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503963, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1835 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1473.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1073.25, + "completions/mean_terminated_length": 1073.25, + "completions/min_length": 774.0, + "completions/min_terminated_length": 774.0, + "epoch": 0.45911477869467365, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.404151729421034, + "kl": 0.015411376953125, + "learning_rate": 6.907750026744927e-07, + "loss": -0.0206, + "num_tokens": 82987955.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0061125755310059, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02871448483439275, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08028431132680781, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14089659985908765, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1836 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1385.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 977.6875, + "completions/mean_terminated_length": 977.6875, + "completions/min_length": 849.0, + "completions/min_terminated_length": 849.0, + "epoch": 0.4593648412103026, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0302082068429708, + "kl": 0.01264190673828125, + "learning_rate": 6.904018567093435e-07, + "loss": -0.0214, + "num_tokens": 83020718.0, + "reward": 0.0, + "reward_std": 0.8798761367797852, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09262552280283803, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13064356903568808, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15293426329272616, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1837 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1222.75, + "completions/mean_terminated_length": 1204.2667236328125, + "completions/min_length": 864.0, + "completions/min_terminated_length": 864.0, + "epoch": 0.4596149037259315, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2237613171205233, + "kl": 0.015289306640625, + "learning_rate": 6.900286037626467e-07, + "loss": 0.0114, + "num_tokens": 83066594.0, + "reward": 0.0, + "reward_std": 0.7387148141860962, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0316166225070472, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.048146613630875724, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10183501544346313, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1838 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1307.0, + "completions/mean_length": 1136.5, + "completions/mean_terminated_length": 1084.571533203125, + "completions/min_length": 781.0, + "completions/min_terminated_length": 781.0, + "epoch": 0.45986496624156037, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2433559023958574, + "kl": 0.01239013671875, + "learning_rate": 6.896552441188089e-07, + "loss": -0.0078, + "num_tokens": 83110522.0, + "reward": 0.0, + "reward_std": 0.8725123405456543, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019891341202272712, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11410652031283763, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10470416879457554, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1839 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1442.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1022.6875, + "completions/mean_terminated_length": 1022.6875, + "completions/min_length": 657.0, + "completions/min_terminated_length": 657.0, + "epoch": 0.4601150287571893, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.032687896395839, + "kl": 0.013824462890625, + "learning_rate": 6.892817780623176e-07, + "loss": -0.0568, + "num_tokens": 83149861.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0448403358459473, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.016500985972915844, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10793586936575628, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1840 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1340.0, + "completions/max_terminated_length": 1340.0, + "completions/mean_length": 1031.9375, + "completions/mean_terminated_length": 1031.9375, + "completions/min_length": 652.0, + "completions/min_terminated_length": 652.0, + "epoch": 0.4603650912728182, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9658534023727285, + "kl": 0.0146026611328125, + "learning_rate": 6.889082058777415e-07, + "loss": -0.0094, + "num_tokens": 83184644.0, + "reward": 2.60770320892334e-08, + "reward_std": 1.0671181678771973, + "rewards/wordcountpos_reward_GEOBench/mean": 2.60770320892334e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.045287337921535775, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0757621835587532, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13871099718746435, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1841 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1061.875, + "completions/mean_terminated_length": 960.769287109375, + "completions/min_length": 433.0, + "completions/min_terminated_length": 433.0, + "epoch": 0.4606151537884471, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.0644084177572735, + "kl": 0.025146484375, + "learning_rate": 6.885345278497302e-07, + "loss": 0.0421, + "num_tokens": 83232194.0, + "reward": 0.0, + "reward_std": 0.9290933609008789, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.003079044104724673, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03722256559033691, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1842 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1470.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1266.6875, + "completions/mean_terminated_length": 1266.6875, + "completions/min_length": 916.0, + "completions/min_terminated_length": 916.0, + "epoch": 0.460865216304076, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1479680627332662, + "kl": 0.005107879638671875, + "learning_rate": 6.88160744263014e-07, + "loss": -0.0106, + "num_tokens": 83277869.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9287983179092407, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01780458746218854, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07844349964093861, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026005, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1843 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1234.5, + "completions/mean_terminated_length": 1075.2000732421875, + "completions/min_length": 926.0, + "completions/min_terminated_length": 926.0, + "epoch": 0.46111527881970493, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7839205746139797, + "kl": 0.0098419189453125, + "learning_rate": 6.877868554024034e-07, + "loss": -0.0264, + "num_tokens": 83325213.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0659749507904053, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05528262248719654, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.24447824003346008, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13214750456578045, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1844 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1450.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1059.9375, + "completions/mean_terminated_length": 1059.9375, + "completions/min_length": 551.0, + "completions/min_terminated_length": 551.0, + "epoch": 0.46136534133533386, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5628619906974577, + "kl": 0.01849365234375, + "learning_rate": 6.874128615527896e-07, + "loss": -0.0553, + "num_tokens": 83366772.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.0636755228042603, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06216897267737967, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07410262701311715, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1845 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1341.625, + "completions/mean_terminated_length": 1246.5999755859375, + "completions/min_length": 1042.0, + "completions/min_terminated_length": 1042.0, + "epoch": 0.4616154038509627, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6666708097461305, + "kl": 0.0153350830078125, + "learning_rate": 6.870387629991432e-07, + "loss": -0.0183, + "num_tokens": 83425894.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0647468566894531, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05177356753271487, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09461828986477723, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05443310539518177, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1846 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1305.0, + "completions/max_terminated_length": 1305.0, + "completions/mean_length": 1092.6875, + "completions/mean_terminated_length": 1092.6875, + "completions/min_length": 809.0, + "completions/min_terminated_length": 809.0, + "epoch": 0.46186546636659165, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6979280133137364, + "kl": 0.016143798828125, + "learning_rate": 6.86664560026515e-07, + "loss": 0.0329, + "num_tokens": 83463817.0, + "reward": 0.0, + "reward_std": 0.818084716796875, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.026275904756948642, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08616822698729962, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07084150279686705, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1847 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1144.4375, + "completions/mean_terminated_length": 1093.6429443359375, + "completions/min_length": 714.0, + "completions/min_terminated_length": 714.0, + "epoch": 0.46211552888222057, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.725849642204346, + "kl": 0.0160369873046875, + "learning_rate": 6.86290252920035e-07, + "loss": -0.0196, + "num_tokens": 83511648.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9439548850059509, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.004773744118550791, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08793436285181201, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11021863793455329, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1848 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1257.0, + "completions/max_terminated_length": 1257.0, + "completions/mean_length": 1177.8125, + "completions/mean_terminated_length": 1177.8125, + "completions/min_length": 1055.0, + "completions/min_terminated_length": 1055.0, + "epoch": 0.46236559139784944, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.371185966699895, + "kl": 0.00732421875, + "learning_rate": 6.859158419649133e-07, + "loss": 0.0234, + "num_tokens": 83551365.0, + "reward": 0.0, + "reward_std": 0.7918788194656372, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.009690211071354758, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.024744903389538494, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10174405069512348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1849 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1190.0, + "completions/max_terminated_length": 1190.0, + "completions/mean_length": 944.75, + "completions/mean_terminated_length": 944.75, + "completions/min_length": 748.0, + "completions/min_terminated_length": 748.0, + "epoch": 0.46261565391347836, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4535330688906023, + "kl": 0.013214111328125, + "learning_rate": 6.855413274464379e-07, + "loss": -0.0237, + "num_tokens": 83584017.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9044044017791748, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.014628944142300017, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.040536455882283585, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258102, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1850 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1195.0, + "completions/mean_terminated_length": 1151.4285888671875, + "completions/min_length": 971.0, + "completions/min_terminated_length": 971.0, + "epoch": 0.4628657164291073, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.41619330663218, + "kl": 0.00836944580078125, + "learning_rate": 6.851667096499768e-07, + "loss": -0.046, + "num_tokens": 83619105.0, + "reward": 0.0, + "reward_std": 0.6548577547073364, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.14674974107711472, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14936259761979365, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941137, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1851 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1055.0, + "completions/max_terminated_length": 1055.0, + "completions/mean_length": 961.9375, + "completions/mean_terminated_length": 961.9375, + "completions/min_length": 683.0, + "completions/min_terminated_length": 683.0, + "epoch": 0.4631157789447362, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7759836376838716, + "kl": 0.008697509765625, + "learning_rate": 6.847919888609764e-07, + "loss": -0.0008, + "num_tokens": 83655400.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7785439491271973, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02664784667108073, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13495020850641687, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1852 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1204.0, + "completions/mean_length": 1193.5625, + "completions/mean_terminated_length": 887.125, + "completions/min_length": 738.0, + "completions/min_terminated_length": 738.0, + "epoch": 0.4633658414603651, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.0707046293457205, + "kl": 0.01776123046875, + "learning_rate": 6.844171653649613e-07, + "loss": -0.0231, + "num_tokens": 83704025.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7044391632080078, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.016443040639594668, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.01754189462143652, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620105, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1853 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1251.9375, + "completions/mean_terminated_length": 1169.25, + "completions/min_length": 897.0, + "completions/min_terminated_length": 897.0, + "epoch": 0.463615903975994, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2891160558561734, + "kl": 0.0159454345703125, + "learning_rate": 6.840422394475346e-07, + "loss": -0.0266, + "num_tokens": 83754160.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.992673933506012, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1854 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 955.0, + "completions/mean_length": 1169.6875, + "completions/mean_terminated_length": 839.375, + "completions/min_length": 617.0, + "completions/min_terminated_length": 617.0, + "epoch": 0.4638659664916229, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1406880449861596, + "kl": 0.0127716064453125, + "learning_rate": 6.836672113943774e-07, + "loss": 0.0363, + "num_tokens": 83803291.0, + "reward": 0.0, + "reward_std": 1.0250167846679688, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.14230976725261926, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0874199388283981, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07290277645477447, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1855 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1231.0, + "completions/max_terminated_length": 1231.0, + "completions/mean_length": 999.25, + "completions/mean_terminated_length": 999.25, + "completions/min_length": 812.0, + "completions/min_terminated_length": 812.0, + "epoch": 0.4641160290072518, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.437755171143294, + "kl": 0.0160064697265625, + "learning_rate": 6.832920814912486e-07, + "loss": 0.0067, + "num_tokens": 83835183.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0335426330566406, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.022269980812583173, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05668275806659453, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12852438880818895, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1856 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1227.0, + "completions/mean_length": 1282.5625, + "completions/mean_terminated_length": 1065.125, + "completions/min_length": 955.0, + "completions/min_terminated_length": 955.0, + "epoch": 0.4643660915228807, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7068882325100163, + "kl": 0.0133514404296875, + "learning_rate": 6.829168500239846e-07, + "loss": -0.0018, + "num_tokens": 83886872.0, + "reward": 0.0, + "reward_std": 0.8782742023468018, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05208720319280023, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05379365857628684, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1548595540529595, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1857 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1404.0, + "completions/mean_length": 1043.625, + "completions/mean_terminated_length": 978.4285888671875, + "completions/min_length": 481.0, + "completions/min_terminated_length": 481.0, + "epoch": 0.46461615403850964, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8238752274575134, + "kl": 0.0159912109375, + "learning_rate": 6.825415172784996e-07, + "loss": -0.1222, + "num_tokens": 83940114.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6436352729797363, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03389209865876737, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.039298575184603834, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0666666666666667, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1858 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1314.0, + "completions/max_terminated_length": 1314.0, + "completions/mean_length": 1009.5, + "completions/mean_terminated_length": 1009.5, + "completions/min_length": 698.0, + "completions/min_terminated_length": 698.0, + "epoch": 0.4648662165541385, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7845183070924735, + "kl": 0.0163726806640625, + "learning_rate": 6.821660835407844e-07, + "loss": 0.0002, + "num_tokens": 83972746.0, + "reward": 0.0, + "reward_std": 0.8896753191947937, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.030585530626912154, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.060136754844700185, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10027739304327551, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1859 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1410.0, + "completions/max_terminated_length": 1410.0, + "completions/mean_length": 1135.0, + "completions/mean_terminated_length": 1135.0, + "completions/min_length": 872.0, + "completions/min_terminated_length": 872.0, + "epoch": 0.46511627906976744, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2366241872933967, + "kl": 0.0134735107421875, + "learning_rate": 6.81790549096907e-07, + "loss": -0.0041, + "num_tokens": 84016562.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7251626253128052, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.36068859329844183, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3904876056954611, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12041594578792296, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1860 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1306.6875, + "completions/mean_terminated_length": 1242.25, + "completions/min_length": 1041.0, + "completions/min_terminated_length": 1041.0, + "epoch": 0.46536634158539636, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.081705099517614, + "kl": 0.014678955078125, + "learning_rate": 6.814149142330125e-07, + "loss": 0.0156, + "num_tokens": 84057717.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9982789754867554, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04123169877046761, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04159620624993366, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746356, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1861 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1189.1875, + "completions/mean_terminated_length": 1117.4615478515625, + "completions/min_length": 766.0, + "completions/min_terminated_length": 766.0, + "epoch": 0.4656164041010253, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.308699575656154, + "kl": 0.01404571533203125, + "learning_rate": 6.810391792353217e-07, + "loss": -0.0272, + "num_tokens": 84109688.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.7732906341552734, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.008666699644526309, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06294837995886422, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09953596037316068, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1862 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1494.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1067.625, + "completions/mean_terminated_length": 1067.625, + "completions/min_length": 415.0, + "completions/min_terminated_length": 415.0, + "epoch": 0.46586646661665415, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7451353253556725, + "kl": 0.018096923828125, + "learning_rate": 6.806633443901326e-07, + "loss": -0.0222, + "num_tokens": 84150882.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0186876058578491, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.007883021449688008, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03242588312609541, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12285191326386657, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1863 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1403.0, + "completions/mean_length": 1224.25, + "completions/mean_terminated_length": 1160.615478515625, + "completions/min_length": 1066.0, + "completions/min_terminated_length": 1066.0, + "epoch": 0.4661165291322831, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5652458530896767, + "kl": 0.0135040283203125, + "learning_rate": 6.802874099838187e-07, + "loss": -0.0179, + "num_tokens": 84200158.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9598199129104614, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06439648383078717, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09789424808221024, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0969917904124231, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1864 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1246.25, + "completions/mean_terminated_length": 1187.6923828125, + "completions/min_length": 849.0, + "completions/min_terminated_length": 849.0, + "epoch": 0.466366591647912, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.097307480283226, + "kl": 0.013519287109375, + "learning_rate": 6.799113763028294e-07, + "loss": -0.0141, + "num_tokens": 84248842.0, + "reward": 0.0, + "reward_std": 1.0153568983078003, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10078114430743264, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11249396938048391, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000003, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1865 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1400.0, + "completions/mean_length": 1285.1875, + "completions/mean_terminated_length": 1187.5455322265625, + "completions/min_length": 749.0, + "completions/min_terminated_length": 749.0, + "epoch": 0.46661665416354087, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8376120327819168, + "kl": 0.0108184814453125, + "learning_rate": 6.795352436336901e-07, + "loss": 0.0595, + "num_tokens": 84296341.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8875278234481812, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04575783341109019, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11466158115220573, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0687184270936277, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1866 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1339.0, + "completions/max_terminated_length": 1339.0, + "completions/mean_length": 990.5, + "completions/mean_terminated_length": 990.5, + "completions/min_length": 825.0, + "completions/min_terminated_length": 825.0, + "epoch": 0.4668667166791698, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1799001126202646, + "kl": 0.0123443603515625, + "learning_rate": 6.791590122630013e-07, + "loss": -0.0239, + "num_tokens": 84339933.0, + "reward": 0.0, + "reward_std": 0.8213618397712708, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.015282149404356434, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.22959015441100467, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.053748384988657034, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1867 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1366.0, + "completions/mean_length": 988.375, + "completions/mean_terminated_length": 954.2667236328125, + "completions/min_length": 780.0, + "completions/min_terminated_length": 780.0, + "epoch": 0.4671167791947987, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0078472526892988, + "kl": 0.0125274658203125, + "learning_rate": 6.787826824774385e-07, + "loss": -0.0542, + "num_tokens": 84368451.0, + "reward": 0.0, + "reward_std": 1.0029993057250977, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.020478965936279826, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11564753356270147, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1868 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1255.0, + "completions/max_terminated_length": 1255.0, + "completions/mean_length": 984.8125, + "completions/mean_terminated_length": 984.8125, + "completions/min_length": 779.0, + "completions/min_terminated_length": 779.0, + "epoch": 0.4673668417104276, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4791815513922066, + "kl": 0.0170135498046875, + "learning_rate": 6.784062545637529e-07, + "loss": 0.0096, + "num_tokens": 84411176.0, + "reward": 0.0, + "reward_std": 0.8106040954589844, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005932266193526273, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.051653062401242025, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10174405069512348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1869 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1212.0, + "completions/max_terminated_length": 1212.0, + "completions/mean_length": 1092.5625, + "completions/mean_terminated_length": 1092.5625, + "completions/min_length": 998.0, + "completions/min_terminated_length": 998.0, + "epoch": 0.4676169042260565, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0798004703888644, + "kl": 0.011810302734375, + "learning_rate": 6.780297288087701e-07, + "loss": -0.0053, + "num_tokens": 84465817.0, + "reward": 0.0, + "reward_std": 0.8956692218780518, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.016527467205764035, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19069801849407234, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746357, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1870 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1132.125, + "completions/mean_terminated_length": 1047.2308349609375, + "completions/min_length": 693.0, + "completions/min_terminated_length": 693.0, + "epoch": 0.46786696674168543, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.596546602750799, + "kl": 0.0111541748046875, + "learning_rate": 6.776531054993898e-07, + "loss": 0.0798, + "num_tokens": 84502339.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9847208261489868, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08090233098082288, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.091344147888233, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1871 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1049.0, + "completions/mean_terminated_length": 1018.9334106445312, + "completions/min_length": 784.0, + "completions/min_terminated_length": 784.0, + "epoch": 0.46811702925731435, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.260547688510743, + "kl": 0.0122528076171875, + "learning_rate": 6.772763849225868e-07, + "loss": -0.0011, + "num_tokens": 84532947.0, + "reward": 0.0, + "reward_std": 0.891823410987854, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.004706014144111362, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03426228419090707, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1338185615204685, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1872 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1232.6875, + "completions/mean_terminated_length": 1171.0, + "completions/min_length": 967.0, + "completions/min_terminated_length": 967.0, + "epoch": 0.4683670917729432, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3600521630593123, + "kl": 0.0139312744140625, + "learning_rate": 6.768995673654094e-07, + "loss": -0.0364, + "num_tokens": 84583622.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0459738969802856, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0036310154189729536, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.045492990153671205, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0850925422157591, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1873 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1398.0, + "completions/mean_length": 1264.9375, + "completions/mean_terminated_length": 1186.5833740234375, + "completions/min_length": 804.0, + "completions/min_terminated_length": 804.0, + "epoch": 0.46861715428857215, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.05390518226303, + "kl": 0.0145416259765625, + "learning_rate": 6.765226531149802e-07, + "loss": -0.0243, + "num_tokens": 84640357.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9955357313156128, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0981959140433387, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07898310050115967, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952499, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1874 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1398.0, + "completions/mean_length": 1086.375, + "completions/mean_terminated_length": 1027.2857666015625, + "completions/min_length": 748.0, + "completions/min_terminated_length": 748.0, + "epoch": 0.46886721680420107, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.231333435219154, + "kl": 0.03118896484375, + "learning_rate": 6.761456424584953e-07, + "loss": -0.0046, + "num_tokens": 84693243.0, + "reward": 0.0, + "reward_std": 0.8411573767662048, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.023188247078980904, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05609458626427101, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12292725943057184, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1875 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1338.6875, + "completions/mean_terminated_length": 1241.9000244140625, + "completions/min_length": 1083.0, + "completions/min_terminated_length": 1083.0, + "epoch": 0.46911727931982994, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3864594579335274, + "kl": 0.0092926025390625, + "learning_rate": 6.757685356832242e-07, + "loss": -0.0391, + "num_tokens": 84731334.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.43781769275665283, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09394403264323559, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19930802524801294, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07302967433402215, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1876 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1094.0, + "completions/max_terminated_length": 1094.0, + "completions/mean_length": 960.375, + "completions/mean_terminated_length": 960.375, + "completions/min_length": 756.0, + "completions/min_terminated_length": 756.0, + "epoch": 0.46936734183545886, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.610111717912981, + "kl": 0.00927734375, + "learning_rate": 6.7539133307651e-07, + "loss": -0.0116, + "num_tokens": 84762020.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6590899229049683, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12984001761493558, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10866440503855579, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1877 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1428.0, + "completions/max_terminated_length": 1428.0, + "completions/mean_length": 1247.1875, + "completions/mean_terminated_length": 1247.1875, + "completions/min_length": 1007.0, + "completions/min_terminated_length": 1007.0, + "epoch": 0.4696174043510878, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8912752780433855, + "kl": 0.015350341796875, + "learning_rate": 6.750140349257681e-07, + "loss": -0.0098, + "num_tokens": 84805631.0, + "reward": 0.0, + "reward_std": 0.523679792881012, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.005854385865432446, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03195934706278732, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0910840068085298, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1878 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 1013.125, + "completions/mean_terminated_length": 943.5714721679688, + "completions/min_length": 692.0, + "completions/min_terminated_length": 692.0, + "epoch": 0.46986746686671665, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.219627927094694, + "kl": 0.0121917724609375, + "learning_rate": 6.746366415184875e-07, + "loss": 0.077, + "num_tokens": 84844985.0, + "reward": 0.0, + "reward_std": 0.5874274969100952, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.13686406357158193, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10078634989371796, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08509254221575906, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1879 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1113.0, + "completions/max_terminated_length": 1113.0, + "completions/mean_length": 835.6875, + "completions/mean_terminated_length": 835.6875, + "completions/min_length": 525.0, + "completions/min_terminated_length": 525.0, + "epoch": 0.4701175293823456, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1498973299644732, + "kl": 0.01251983642578125, + "learning_rate": 6.742591531422295e-07, + "loss": 0.0056, + "num_tokens": 84878468.0, + "reward": 0.0, + "reward_std": 0.7608758211135864, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07377928151555223, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2036321048626687, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11155467020454343, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1880 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1319.0, + "completions/mean_length": 1173.3125, + "completions/mean_terminated_length": 1126.6429443359375, + "completions/min_length": 856.0, + "completions/min_terminated_length": 856.0, + "epoch": 0.4703675918979745, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.822601106472211, + "kl": 0.0131378173828125, + "learning_rate": 6.738815700846276e-07, + "loss": 0.0369, + "num_tokens": 84931337.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5588453412055969, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12781366966771857, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20068435069928198, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15371932093796678, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1881 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1198.9375, + "completions/mean_terminated_length": 1178.86669921875, + "completions/min_length": 887.0, + "completions/min_terminated_length": 887.0, + "epoch": 0.4706176544136034, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.849956334459045, + "kl": 0.0137939453125, + "learning_rate": 6.735038926333871e-07, + "loss": 0.0091, + "num_tokens": 84979192.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5622043609619141, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0379224917223121, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.055237339074835, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026001, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1882 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1352.0, + "completions/max_terminated_length": 1352.0, + "completions/mean_length": 1129.9375, + "completions/mean_terminated_length": 1129.9375, + "completions/min_length": 840.0, + "completions/min_terminated_length": 840.0, + "epoch": 0.4708677169292323, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.584526986748381, + "kl": 0.020050048828125, + "learning_rate": 6.731261210762862e-07, + "loss": -0.0483, + "num_tokens": 85029687.0, + "reward": 0.0, + "reward_std": 0.6479766964912415, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01086311167922276, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11246117196690046, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1134476547592341, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1883 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1398.9375, + "completions/mean_terminated_length": 1269.0, + "completions/min_length": 871.0, + "completions/min_terminated_length": 871.0, + "epoch": 0.4711177794448612, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3342985556912286, + "kl": 0.013824462890625, + "learning_rate": 6.727482557011741e-07, + "loss": -0.0291, + "num_tokens": 85087694.0, + "reward": 0.0, + "reward_std": 0.507337212562561, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09758924662521723, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10608951099203826, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1884 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1136.9375, + "completions/mean_terminated_length": 1112.7333984375, + "completions/min_length": 699.0, + "completions/min_terminated_length": 699.0, + "epoch": 0.47136784196049014, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6046286115296335, + "kl": 0.0157012939453125, + "learning_rate": 6.723702967959717e-07, + "loss": 0.0165, + "num_tokens": 85131053.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9220064878463745, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.036543765105342706, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06718511897278712, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333334, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1885 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1143.0, + "completions/max_terminated_length": 1143.0, + "completions/mean_length": 1033.5625, + "completions/mean_terminated_length": 1033.5625, + "completions/min_length": 718.0, + "completions/min_terminated_length": 718.0, + "epoch": 0.471617904476119, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.126873189721066, + "kl": 0.0134124755859375, + "learning_rate": 6.719922446486709e-07, + "loss": 0.0278, + "num_tokens": 85173806.0, + "reward": 0.0, + "reward_std": 0.721783459186554, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.042370467236421336, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06243893413771245, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1886 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1288.0, + "completions/mean_length": 841.75, + "completions/mean_terminated_length": 797.86669921875, + "completions/min_length": 399.0, + "completions/min_terminated_length": 399.0, + "epoch": 0.47186796699174793, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.385232347776868, + "kl": 0.0172271728515625, + "learning_rate": 6.716140995473354e-07, + "loss": -0.0746, + "num_tokens": 85210714.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9111377596855164, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.036210207767595565, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05913067286840874, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1299572579307862, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1887 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1371.0625, + "completions/mean_terminated_length": 1205.2857666015625, + "completions/min_length": 790.0, + "completions/min_terminated_length": 790.0, + "epoch": 0.47211802950737686, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9643017665298697, + "kl": 0.0117340087890625, + "learning_rate": 6.712358617800985e-07, + "loss": -0.0776, + "num_tokens": 85266819.0, + "reward": 0.0, + "reward_std": 0.9653382301330566, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09234065062037378, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10408440980056621, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10027739304327551, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1888 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1281.0625, + "completions/mean_terminated_length": 1208.0833740234375, + "completions/min_length": 901.0, + "completions/min_terminated_length": 901.0, + "epoch": 0.4723680920230057, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7013703689639295, + "kl": 0.011688232421875, + "learning_rate": 6.708575316351653e-07, + "loss": -0.0088, + "num_tokens": 85310756.0, + "reward": 0.0, + "reward_std": 0.6903394460678101, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05869433158443118, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08242853481494061, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.053748384988657034, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1889 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1088.75, + "completions/mean_terminated_length": 1061.3333740234375, + "completions/min_length": 712.0, + "completions/min_terminated_length": 712.0, + "epoch": 0.47261815453863465, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9002191071252645, + "kl": 0.0166473388671875, + "learning_rate": 6.704791094008107e-07, + "loss": -0.0382, + "num_tokens": 85348608.0, + "reward": 0.0, + "reward_std": 1.0033472776412964, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0004588709206999543, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04226266942317501, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1890 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1248.875, + "completions/mean_terminated_length": 1098.2000732421875, + "completions/min_length": 685.0, + "completions/min_terminated_length": 685.0, + "epoch": 0.4728682170542636, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.394661396945037, + "kl": 0.008819580078125, + "learning_rate": 6.701005953653796e-07, + "loss": 0.0372, + "num_tokens": 85409918.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7433128356933594, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02827586354116152, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04551550390939511, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10390522747338701, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1891 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1348.0, + "completions/mean_length": 966.9375, + "completions/mean_terminated_length": 890.7857666015625, + "completions/min_length": 687.0, + "completions/min_terminated_length": 687.0, + "epoch": 0.4731182795698925, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.718240450910618, + "kl": 0.010467529296875, + "learning_rate": 6.697219898172874e-07, + "loss": 0.0145, + "num_tokens": 85451445.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.45938563346862793, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00846828192083902, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.030645747633138917, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13817594795257457, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1892 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1320.0625, + "completions/mean_terminated_length": 1140.125, + "completions/min_length": 887.0, + "completions/min_terminated_length": 887.0, + "epoch": 0.47336834208552137, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8732508299089465, + "kl": 0.014617919921875, + "learning_rate": 6.69343293045019e-07, + "loss": -0.0557, + "num_tokens": 85507014.0, + "reward": 0.0, + "reward_std": 0.8374285101890564, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.18364088155140024, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06779809847765349, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10246950765959599, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1893 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1495.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1165.625, + "completions/mean_terminated_length": 1165.625, + "completions/min_length": 759.0, + "completions/min_terminated_length": 759.0, + "epoch": 0.4736184046011503, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.910010649146053, + "kl": 0.01800537109375, + "learning_rate": 6.689645053371285e-07, + "loss": -0.0296, + "num_tokens": 85551960.0, + "reward": 0.0, + "reward_std": 0.9631032943725586, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07032702738217428, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.131551901954233, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.133263870794973, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1894 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1301.0, + "completions/mean_length": 1322.25, + "completions/mean_terminated_length": 1144.5, + "completions/min_length": 730.0, + "completions/min_terminated_length": 730.0, + "epoch": 0.4738684671167792, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.086504905778682, + "kl": 0.0092620849609375, + "learning_rate": 6.685856269822395e-07, + "loss": -0.0241, + "num_tokens": 85615156.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0114206075668335, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04788631994140999, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07389677474841409, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.102469507659596, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1895 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1231.3125, + "completions/mean_terminated_length": 1022.3333129882812, + "completions/min_length": 729.0, + "completions/min_terminated_length": 729.0, + "epoch": 0.4741185296324081, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1551063215404613, + "kl": 0.0142822265625, + "learning_rate": 6.682066582690451e-07, + "loss": -0.0156, + "num_tokens": 85659921.0, + "reward": 0.0, + "reward_std": 1.0234266519546509, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.16306932418226372, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0885898676088075, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09737289911202952, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1896 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1239.0, + "completions/max_terminated_length": 1239.0, + "completions/mean_length": 881.25, + "completions/mean_terminated_length": 881.25, + "completions/min_length": 571.0, + "completions/min_terminated_length": 571.0, + "epoch": 0.474368592148037, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7656127144296865, + "kl": 0.01898193359375, + "learning_rate": 6.678275994863065e-07, + "loss": -0.0255, + "num_tokens": 85698309.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0139950513839722, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.049074207335977206, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05212483909444117, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1897 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1488.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1066.0, + "completions/mean_terminated_length": 1066.0, + "completions/min_length": 549.0, + "completions/min_terminated_length": 549.0, + "epoch": 0.47461865466366593, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1373972519070024, + "kl": 0.01690673828125, + "learning_rate": 6.674484509228541e-07, + "loss": -0.041, + "num_tokens": 85752125.0, + "reward": 0.0, + "reward_std": 0.48677903413772583, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07382111109997128, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.164739426146542, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10878112581387148, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1898 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1256.0, + "completions/max_terminated_length": 1256.0, + "completions/mean_length": 1067.0625, + "completions/mean_terminated_length": 1067.0625, + "completions/min_length": 779.0, + "completions/min_terminated_length": 779.0, + "epoch": 0.4748687171792948, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6601113866120984, + "kl": 0.01556396484375, + "learning_rate": 6.670692128675864e-07, + "loss": -0.0241, + "num_tokens": 85796606.0, + "reward": 0.0, + "reward_std": 0.7382583618164062, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10998717911114568, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0985613155568639, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087683, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1899 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1276.0, + "completions/mean_length": 1113.125, + "completions/mean_terminated_length": 1087.3333740234375, + "completions/min_length": 875.0, + "completions/min_terminated_length": 875.0, + "epoch": 0.4751187796949237, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8358423880164643, + "kl": 0.0106353759765625, + "learning_rate": 6.6668988560947e-07, + "loss": -0.0302, + "num_tokens": 85837136.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5034657120704651, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07615919604734966, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09454315495848817, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1900 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1192.4375, + "completions/mean_terminated_length": 1052.6363525390625, + "completions/min_length": 823.0, + "completions/min_terminated_length": 823.0, + "epoch": 0.47536884221055264, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1452655291829354, + "kl": 0.010223388671875, + "learning_rate": 6.6631046943754e-07, + "loss": -0.0625, + "num_tokens": 85885183.0, + "reward": 0.0, + "reward_std": 0.7404085397720337, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07537805637773076, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1385461062160581, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590966, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1901 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1248.3125, + "completions/mean_terminated_length": 1190.2308349609375, + "completions/min_length": 849.0, + "completions/min_terminated_length": 849.0, + "epoch": 0.47561890472618157, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2338950732321914, + "kl": 0.0136260986328125, + "learning_rate": 6.659309646408988e-07, + "loss": -0.0161, + "num_tokens": 85929524.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9128320217132568, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03282378502973593, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05076372207090238, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06978803887752093, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1902 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1343.0, + "completions/mean_length": 1058.75, + "completions/mean_terminated_length": 1029.3333740234375, + "completions/min_length": 623.0, + "completions/min_terminated_length": 623.0, + "epoch": 0.47586896724181044, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5424860471945054, + "kl": 0.0143585205078125, + "learning_rate": 6.655513715087165e-07, + "loss": 0.057, + "num_tokens": 85976576.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.48822712898254395, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12362802714947313, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1444652950781308, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08933913745655644, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1903 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1204.0, + "completions/max_terminated_length": 1204.0, + "completions/mean_length": 1036.9375, + "completions/mean_terminated_length": 1036.9375, + "completions/min_length": 834.0, + "completions/min_terminated_length": 834.0, + "epoch": 0.47611902975743936, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6712529721218803, + "kl": 0.01837158203125, + "learning_rate": 6.651716903302301e-07, + "loss": -0.0462, + "num_tokens": 86026239.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0606942176818848, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.19431230677867498, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2588189807603952, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10602235962635781, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1904 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1133.3125, + "completions/mean_terminated_length": 1011.0833740234375, + "completions/min_length": 555.0, + "completions/min_terminated_length": 555.0, + "epoch": 0.4763690922730683, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3576202726933317, + "kl": 0.015533447265625, + "learning_rate": 6.647919213947447e-07, + "loss": -0.0641, + "num_tokens": 86067492.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.7513043880462646, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.013051272051461393, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0312157796684796, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.675, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11894598836509011, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1905 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1372.0, + "completions/max_terminated_length": 1372.0, + "completions/mean_length": 1004.25, + "completions/mean_terminated_length": 1004.25, + "completions/min_length": 624.0, + "completions/min_terminated_length": 624.0, + "epoch": 0.47661915478869715, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.459437282992562, + "kl": 0.018524169921875, + "learning_rate": 6.644120649916309e-07, + "loss": -0.0075, + "num_tokens": 86107312.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0216128826141357, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11206021657562043, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.22662199165199604, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298364, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1906 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1374.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 1057.0, + "completions/mean_terminated_length": 1057.0, + "completions/min_length": 521.0, + "completions/min_terminated_length": 521.0, + "epoch": 0.4768692173043261, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0596997123517804, + "kl": 0.01291656494140625, + "learning_rate": 6.640321214103271e-07, + "loss": 0.0151, + "num_tokens": 86139200.0, + "reward": 0.0, + "reward_std": 0.9146584272384644, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0584655365593443, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08669089121891312, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10027739304327549, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1907 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1368.0, + "completions/mean_length": 958.5, + "completions/mean_terminated_length": 881.1428833007812, + "completions/min_length": 582.0, + "completions/min_terminated_length": 582.0, + "epoch": 0.477119279819955, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6027005027463592, + "kl": 0.0171051025390625, + "learning_rate": 6.636520909403374e-07, + "loss": -0.0362, + "num_tokens": 86170400.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0474727153778076, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06965954483935002, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03924382681180245, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1908 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1399.0, + "completions/mean_length": 1258.75, + "completions/mean_terminated_length": 1224.2857666015625, + "completions/min_length": 1014.0, + "completions/min_terminated_length": 1014.0, + "epoch": 0.47736934233558387, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8245123417044025, + "kl": 0.0153961181640625, + "learning_rate": 6.632719738712324e-07, + "loss": 0.0208, + "num_tokens": 86223812.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8789528608322144, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.014907962754595662, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03938192682975242, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05947299418254506, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1909 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1320.0, + "completions/mean_length": 1160.5, + "completions/mean_terminated_length": 1006.1818237304688, + "completions/min_length": 604.0, + "completions/min_terminated_length": 604.0, + "epoch": 0.4776194048512128, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3985468481037096, + "kl": 0.0147705078125, + "learning_rate": 6.628917704926491e-07, + "loss": -0.0711, + "num_tokens": 86269396.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6039036512374878, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05184356677792204, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12688890877784711, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10749676997731401, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1910 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1369.0, + "completions/max_terminated_length": 1369.0, + "completions/mean_length": 979.125, + "completions/mean_terminated_length": 979.125, + "completions/min_length": 779.0, + "completions/min_terminated_length": 779.0, + "epoch": 0.4778694673668417, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7971929433532927, + "kl": 0.0162506103515625, + "learning_rate": 6.625114810942894e-07, + "loss": 0.0023, + "num_tokens": 86300054.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0679841041564941, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07884959650033302, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07111722467132862, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14375905768565217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1911 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1343.0, + "completions/max_terminated_length": 1343.0, + "completions/mean_length": 949.6875, + "completions/mean_terminated_length": 949.6875, + "completions/min_length": 662.0, + "completions/min_terminated_length": 662.0, + "epoch": 0.47811952988247064, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1843706370818854, + "kl": 0.013092041015625, + "learning_rate": 6.621311059659214e-07, + "loss": 0.0104, + "num_tokens": 86341697.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0394268035888672, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12741399527114455, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16629177131924863, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042256, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1912 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1173.0, + "completions/max_terminated_length": 1173.0, + "completions/mean_length": 949.5625, + "completions/mean_terminated_length": 949.5625, + "completions/min_length": 760.0, + "completions/min_terminated_length": 760.0, + "epoch": 0.4783695923980995, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.073098116170339, + "kl": 0.0158843994140625, + "learning_rate": 6.617506453973781e-07, + "loss": -0.0053, + "num_tokens": 86387394.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9476511478424072, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.017755884045153724, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.058008013636693556, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639736, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1913 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1391.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1168.25, + "completions/mean_terminated_length": 1168.25, + "completions/min_length": 828.0, + "completions/min_terminated_length": 828.0, + "epoch": 0.47861965491372843, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1075584778933507, + "kl": 0.019989013671875, + "learning_rate": 6.61370099678558e-07, + "loss": 0.0034, + "num_tokens": 86437998.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0403326749801636, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09265043491302337, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0811878953160365, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11417984514369005, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1914 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1174.0, + "completions/max_terminated_length": 1174.0, + "completions/mean_length": 908.9375, + "completions/mean_terminated_length": 908.9375, + "completions/min_length": 715.0, + "completions/min_terminated_length": 715.0, + "epoch": 0.47886971742935736, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4558621482144662, + "kl": 0.01275634765625, + "learning_rate": 6.60989469099424e-07, + "loss": 0.0031, + "num_tokens": 86467973.0, + "reward": 0.0, + "reward_std": 0.8515634536743164, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.023816436278333908, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04434163264913807, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1915 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1200.0, + "completions/max_terminated_length": 1200.0, + "completions/mean_length": 888.5, + "completions/mean_terminated_length": 888.5, + "completions/min_length": 525.0, + "completions/min_terminated_length": 525.0, + "epoch": 0.4791197799449862, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.595904135548974, + "kl": 0.01708984375, + "learning_rate": 6.606087539500042e-07, + "loss": -0.015, + "num_tokens": 86502813.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0023553371429443, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.006931449098706834, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04363829847511768, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11409872268574493, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1916 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1386.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 1132.3125, + "completions/mean_terminated_length": 1132.3125, + "completions/min_length": 887.0, + "completions/min_terminated_length": 887.0, + "epoch": 0.47936984246061515, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.442197935571225, + "kl": 0.0161590576171875, + "learning_rate": 6.60227954520391e-07, + "loss": -0.0206, + "num_tokens": 86539954.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0680949687957764, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11538983843829066, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1917 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1343.0, + "completions/mean_terminated_length": 1186.0, + "completions/min_length": 897.0, + "completions/min_terminated_length": 897.0, + "epoch": 0.47961990497624407, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3788102479971447, + "kl": 0.0125732421875, + "learning_rate": 6.598470711007407e-07, + "loss": -0.0271, + "num_tokens": 86589554.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0416228771209717, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02468830760096622, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.023363700997809145, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921948, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1918 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1357.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 1098.1875, + "completions/mean_terminated_length": 1098.1875, + "completions/min_length": 813.0, + "completions/min_terminated_length": 813.0, + "epoch": 0.479869967491873, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9078423063319003, + "kl": 0.01220703125, + "learning_rate": 6.594661039812738e-07, + "loss": -0.0279, + "num_tokens": 86630173.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0249204635620117, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00025285151722808166, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09976635610073933, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05821416398857664, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1919 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1399.0, + "completions/max_terminated_length": 1399.0, + "completions/mean_length": 1167.125, + "completions/mean_terminated_length": 1167.125, + "completions/min_length": 928.0, + "completions/min_terminated_length": 928.0, + "epoch": 0.48012003000750186, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0114275025821104, + "kl": 0.01727294921875, + "learning_rate": 6.590850534522746e-07, + "loss": 0.027, + "num_tokens": 86667559.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0341798067092896, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07629825781117985, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08309529727371798, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901162, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1920 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1272.375, + "completions/mean_terminated_length": 1257.2000732421875, + "completions/min_length": 1004.0, + "completions/min_terminated_length": 1004.0, + "epoch": 0.4803700925231308, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.120158461609348, + "kl": 0.007785797119140625, + "learning_rate": 6.58703919804091e-07, + "loss": 0.0335, + "num_tokens": 86712781.0, + "reward": 0.0, + "reward_std": 0.9448849558830261, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09893792816568935, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07541002333024091, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8250000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258099, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1921 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1251.0, + "completions/max_terminated_length": 1251.0, + "completions/mean_length": 1031.0625, + "completions/mean_terminated_length": 1031.0625, + "completions/min_length": 880.0, + "completions/min_terminated_length": 880.0, + "epoch": 0.4806201550387597, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.210903779976485, + "kl": 0.01593017578125, + "learning_rate": 6.583227033271342e-07, + "loss": 0.0039, + "num_tokens": 86758030.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.902991771697998, + "rewards/wordcountpos_reward_GEOBench/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00214569158011771, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.00858276632047084, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12049281521534187, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1922 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1306.0, + "completions/max_terminated_length": 1306.0, + "completions/mean_length": 1030.3125, + "completions/mean_terminated_length": 1030.3125, + "completions/min_length": 812.0, + "completions/min_terminated_length": 812.0, + "epoch": 0.4808702175543886, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6490668932166725, + "kl": 0.0117645263671875, + "learning_rate": 6.579414043118783e-07, + "loss": -0.012, + "num_tokens": 86800443.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.29682865738868713, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.19781212396263848, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.4699062469074241, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715323, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1923 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1128.6875, + "completions/mean_terminated_length": 1103.933349609375, + "completions/min_length": 810.0, + "completions/min_terminated_length": 810.0, + "epoch": 0.4811202800700175, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.312982359794677, + "kl": 0.0160064697265625, + "learning_rate": 6.575600230488606e-07, + "loss": -0.0381, + "num_tokens": 86833094.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0014598369598389, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.025380013875671778, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06711906491049055, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10886621079036349, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1924 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1332.25, + "completions/mean_terminated_length": 1276.3333740234375, + "completions/min_length": 1024.0, + "completions/min_terminated_length": 1024.0, + "epoch": 0.4813703425856464, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.111162663316215, + "kl": 0.0137786865234375, + "learning_rate": 6.57178559828681e-07, + "loss": 0.0267, + "num_tokens": 86879922.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0396788120269775, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08792076731535677, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.036237980598093494, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1925 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1232.0, + "completions/max_terminated_length": 1232.0, + "completions/mean_length": 1087.125, + "completions/mean_terminated_length": 1087.125, + "completions/min_length": 932.0, + "completions/min_terminated_length": 932.0, + "epoch": 0.4816204051012753, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.210202838404737, + "kl": 0.0186767578125, + "learning_rate": 6.567970149420017e-07, + "loss": 0.0119, + "num_tokens": 86923836.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.0455186367034912, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.016970647925466247, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0936715283504821, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087683, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1926 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1309.0, + "completions/max_terminated_length": 1309.0, + "completions/mean_length": 1070.1875, + "completions/mean_terminated_length": 1070.1875, + "completions/min_length": 798.0, + "completions/min_terminated_length": 798.0, + "epoch": 0.4818704676169042, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.808423737159861, + "kl": 0.01189422607421875, + "learning_rate": 6.564153886795472e-07, + "loss": 0.0729, + "num_tokens": 86964303.0, + "reward": 0.0, + "reward_std": 0.9947397708892822, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1867322436822907, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18429529914560083, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14707015206910487, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1927 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1199.0, + "completions/mean_terminated_length": 1178.933349609375, + "completions/min_length": 849.0, + "completions/min_terminated_length": 849.0, + "epoch": 0.48212053013253314, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.233081904633079, + "kl": 0.018341064453125, + "learning_rate": 6.560336813321041e-07, + "loss": -0.0271, + "num_tokens": 87017039.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0307118892669678, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.15166307983908242, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1363585193987149, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11538983843829065, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1928 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1403.9375, + "completions/mean_terminated_length": 1307.875, + "completions/min_length": 1077.0, + "completions/min_terminated_length": 1077.0, + "epoch": 0.48237059264816207, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6668869476177544, + "kl": 0.00948333740234375, + "learning_rate": 6.556518931905208e-07, + "loss": 0.011, + "num_tokens": 87064566.0, + "reward": 0.0, + "reward_std": 0.7825750708580017, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01609861314771739, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.027862362603631577, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563382, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1929 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 961.0, + "completions/mean_terminated_length": 925.0667114257812, + "completions/min_length": 304.0, + "completions/min_terminated_length": 304.0, + "epoch": 0.48262065516379093, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.13272318279056, + "kl": 0.01312255859375, + "learning_rate": 6.552700245457068e-07, + "loss": -0.0073, + "num_tokens": 87098374.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.04688560962677, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.005768977501516697, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04366968578358549, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0859586463881842, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1930 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1274.3125, + "completions/mean_terminated_length": 1199.0833740234375, + "completions/min_length": 1025.0, + "completions/min_terminated_length": 1025.0, + "epoch": 0.48287071767941986, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0159871706732613, + "kl": 0.0148468017578125, + "learning_rate": 6.548880756886338e-07, + "loss": -0.0292, + "num_tokens": 87149915.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.6966802477836609, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.008395002042053639, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07019846610764013, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1931 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1200.5, + "completions/mean_terminated_length": 1100.666748046875, + "completions/min_length": 744.0, + "completions/min_terminated_length": 744.0, + "epoch": 0.4831207801950488, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0563058005848385, + "kl": 0.01129150390625, + "learning_rate": 6.545060469103338e-07, + "loss": -0.0296, + "num_tokens": 87201435.0, + "reward": 3.725290298461914e-08, + "reward_std": 0.9147650599479675, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011950478232177039, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05277426820464352, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06978803887752093, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1932 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1412.0, + "completions/mean_length": 1371.75, + "completions/mean_terminated_length": 1089.5999755859375, + "completions/min_length": 585.0, + "completions/min_terminated_length": 585.0, + "epoch": 0.48337084271067765, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.571389136816498, + "kl": 0.0129547119140625, + "learning_rate": 6.541239385019003e-07, + "loss": -0.0731, + "num_tokens": 87257671.0, + "reward": 0.0, + "reward_std": 0.5700106024742126, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03472028267464706, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08187605652475487, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941139, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1933 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1332.375, + "completions/mean_terminated_length": 1321.2000732421875, + "completions/min_length": 708.0, + "completions/min_terminated_length": 708.0, + "epoch": 0.4836209052263066, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.854517046755461, + "kl": 0.0125732421875, + "learning_rate": 6.537417507544868e-07, + "loss": 0.0093, + "num_tokens": 87307501.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0533145666122437, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12464125605722441, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10093169390806661, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13221755360572018, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1934 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1405.0, + "completions/max_terminated_length": 1405.0, + "completions/mean_length": 1136.6875, + "completions/mean_terminated_length": 1136.6875, + "completions/min_length": 795.0, + "completions/min_terminated_length": 795.0, + "epoch": 0.4838709677419355, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.893915102954693, + "kl": 0.01080322265625, + "learning_rate": 6.533594839593081e-07, + "loss": -0.0114, + "num_tokens": 87362736.0, + "reward": 0.0, + "reward_std": 0.9366875290870667, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005311409529845426, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.026139319975203495, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.043673875571185676, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1935 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1246.3125, + "completions/mean_terminated_length": 1161.75, + "completions/min_length": 771.0, + "completions/min_terminated_length": 771.0, + "epoch": 0.48412103025756437, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7515688022794964, + "kl": 0.0187225341796875, + "learning_rate": 6.529771384076383e-07, + "loss": -0.0048, + "num_tokens": 87418293.0, + "reward": 2.8870999813079834e-08, + "reward_std": 1.0377421379089355, + "rewards/wordcountpos_reward_GEOBench/mean": 2.8870999813079834e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10017681786596717, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12393108333963007, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05821416398857659, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1936 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1176.0, + "completions/max_terminated_length": 1176.0, + "completions/mean_length": 910.6875, + "completions/mean_terminated_length": 910.6875, + "completions/min_length": 718.0, + "completions/min_terminated_length": 718.0, + "epoch": 0.4843710927731933, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.625956883060841, + "kl": 0.0075531005859375, + "learning_rate": 6.525947143908125e-07, + "loss": 0.0148, + "num_tokens": 87445392.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9767613410949707, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08950806904111665, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1562469993640405, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1270024788326182, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1937 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1486.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1120.9375, + "completions/mean_terminated_length": 1120.9375, + "completions/min_length": 817.0, + "completions/min_terminated_length": 817.0, + "epoch": 0.4846211552888222, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.266028292535272, + "kl": 0.0153961181640625, + "learning_rate": 6.52212212200225e-07, + "loss": 0.0219, + "num_tokens": 87497895.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9725654721260071, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.014112822875688045, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05706542342911011, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09737289911202957, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1938 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1082.5, + "completions/mean_terminated_length": 1022.857177734375, + "completions/min_length": 623.0, + "completions/min_terminated_length": 623.0, + "epoch": 0.48487121780445114, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6488450913224764, + "kl": 0.00765228271484375, + "learning_rate": 6.518296321273294e-07, + "loss": 0.0336, + "num_tokens": 87542831.0, + "reward": 0.0, + "reward_std": 0.8452266454696655, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04369720299836508, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06912107022259222, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12583057392117916, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1939 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1376.0, + "completions/max_terminated_length": 1376.0, + "completions/mean_length": 1088.8125, + "completions/mean_terminated_length": 1088.8125, + "completions/min_length": 788.0, + "completions/min_terminated_length": 788.0, + "epoch": 0.48512128032008, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7189452432672665, + "kl": 0.0160369873046875, + "learning_rate": 6.514469744636392e-07, + "loss": -0.0193, + "num_tokens": 87581796.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0289440155029297, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0037709259901096944, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.018607392619953353, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09098229375970789, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1940 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1089.0625, + "completions/mean_terminated_length": 1030.357177734375, + "completions/min_length": 640.0, + "completions/min_terminated_length": 640.0, + "epoch": 0.48537134283570893, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.174975290065829, + "kl": 0.0139617919921875, + "learning_rate": 6.510642395007269e-07, + "loss": 0.0046, + "num_tokens": 87618197.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9404416084289551, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.005950687865667246, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0711923700607657, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12852438880818895, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1941 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1270.0, + "completions/max_terminated_length": 1270.0, + "completions/mean_length": 1103.25, + "completions/mean_terminated_length": 1103.25, + "completions/min_length": 874.0, + "completions/min_terminated_length": 874.0, + "epoch": 0.48562140535133785, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.905104137999387, + "kl": 0.0165557861328125, + "learning_rate": 6.506814275302238e-07, + "loss": -0.0276, + "num_tokens": 87659441.0, + "reward": 0.0, + "reward_std": 0.9372748136520386, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.015077162405106842, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05625592369533362, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1514742369000235, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1942 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 1180.4375, + "completions/mean_terminated_length": 1073.916748046875, + "completions/min_length": 755.0, + "completions/min_terminated_length": 755.0, + "epoch": 0.4858714678669667, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.747272186069989, + "kl": 0.01300048828125, + "learning_rate": 6.502985388438198e-07, + "loss": -0.0553, + "num_tokens": 87703856.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.48899126052856445, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03258327865716199, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04383821674297192, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0926962382871743, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1943 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1147.1875, + "completions/mean_terminated_length": 1096.7857666015625, + "completions/min_length": 723.0, + "completions/min_terminated_length": 723.0, + "epoch": 0.48612153038259565, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1991467867374417, + "kl": 0.0177764892578125, + "learning_rate": 6.499155737332634e-07, + "loss": -0.1027, + "num_tokens": 87743931.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0097538232803345, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00845990595216653, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.039336507904032196, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11122216672215288, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1944 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1275.0, + "completions/max_terminated_length": 1275.0, + "completions/mean_length": 993.25, + "completions/mean_terminated_length": 993.25, + "completions/min_length": 704.0, + "completions/min_terminated_length": 704.0, + "epoch": 0.48637159289822457, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.396749147065032, + "kl": 0.0137939453125, + "learning_rate": 6.495325324903613e-07, + "loss": -0.0083, + "num_tokens": 87773167.0, + "reward": -3.725290298461914e-08, + "reward_std": 1.0334279537200928, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.010824084485107075, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03923789726320981, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1945 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1303.4375, + "completions/mean_terminated_length": 1214.0909423828125, + "completions/min_length": 1044.0, + "completions/min_terminated_length": 1044.0, + "epoch": 0.48662165541385344, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1459111899972543, + "kl": 0.0165863037109375, + "learning_rate": 6.491494154069782e-07, + "loss": 0.0011, + "num_tokens": 87826822.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5031982064247131, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02856466498848248, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08583815597117335, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14089659985908765, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1946 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1200.75, + "completions/mean_terminated_length": 1180.800048828125, + "completions/min_length": 952.0, + "completions/min_terminated_length": 952.0, + "epoch": 0.48687171792948236, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2987565159555507, + "kl": 0.017425537109375, + "learning_rate": 6.487662227750367e-07, + "loss": -0.0038, + "num_tokens": 87879282.0, + "reward": 0.0, + "reward_std": 0.9475072622299194, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.20076123859554182, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08894834120219308, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06871842709362769, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1947 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1036.75, + "completions/mean_terminated_length": 970.5714721679688, + "completions/min_length": 726.0, + "completions/min_terminated_length": 726.0, + "epoch": 0.4871217804451113, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9150541857562895, + "kl": 0.0170135498046875, + "learning_rate": 6.483829548865165e-07, + "loss": 0.0247, + "num_tokens": 87932966.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.906354546546936, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.18320243069641537, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17624282186144116, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08255189164891874, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1948 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1397.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 970.3125, + "completions/mean_terminated_length": 970.3125, + "completions/min_length": 567.0, + "completions/min_terminated_length": 567.0, + "epoch": 0.4873718429607402, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4867483489325197, + "kl": 0.0178070068359375, + "learning_rate": 6.479996120334554e-07, + "loss": -0.056, + "num_tokens": 87964843.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0185256004333496, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.007894643071901488, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05825425147321151, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1031898645611484, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1949 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1172.5625, + "completions/mean_terminated_length": 1125.7857666015625, + "completions/min_length": 915.0, + "completions/min_terminated_length": 915.0, + "epoch": 0.4876219054763691, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7833049631210813, + "kl": 0.0147705078125, + "learning_rate": 6.476161945079476e-07, + "loss": 0.0131, + "num_tokens": 88002884.0, + "reward": 0.0, + "reward_std": 0.8976900577545166, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10214767298124958, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10597432833699004, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13817594795257457, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1950 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1385.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 1158.25, + "completions/mean_terminated_length": 1158.25, + "completions/min_length": 833.0, + "completions/min_terminated_length": 833.0, + "epoch": 0.487871967991998, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6588645736154053, + "kl": 0.011871337890625, + "learning_rate": 6.472327026021446e-07, + "loss": -0.0248, + "num_tokens": 88050448.0, + "reward": 0.0, + "reward_std": 0.8797503709793091, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.017572074693090186, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08952195329852142, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1951 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1378.0, + "completions/mean_length": 1175.25, + "completions/mean_terminated_length": 1153.60009765625, + "completions/min_length": 931.0, + "completions/min_terminated_length": 931.0, + "epoch": 0.4881220305076269, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.390198163761793, + "kl": 0.018524169921875, + "learning_rate": 6.468491366082546e-07, + "loss": -0.0272, + "num_tokens": 88088556.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6109719276428223, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04268424388671796, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.053506543191998275, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10327955589886446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1952 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1363.0, + "completions/max_terminated_length": 1363.0, + "completions/mean_length": 1173.0625, + "completions/mean_terminated_length": 1173.0625, + "completions/min_length": 862.0, + "completions/min_terminated_length": 862.0, + "epoch": 0.4883720930232558, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.77906762281682, + "kl": 0.0093536376953125, + "learning_rate": 6.464654968185418e-07, + "loss": -0.035, + "num_tokens": 88124957.0, + "reward": 0.0, + "reward_std": 0.7116352319717407, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.014833407761338454, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10477723986596917, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1953 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1414.0, + "completions/mean_length": 1311.0625, + "completions/mean_terminated_length": 1197.7000732421875, + "completions/min_length": 959.0, + "completions/min_terminated_length": 959.0, + "epoch": 0.4886221555388847, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0651424546338393, + "kl": 0.01800537109375, + "learning_rate": 6.460817835253275e-07, + "loss": 0.0019, + "num_tokens": 88180750.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.044459342956543, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12962997152836522, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14834777849547726, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1102186379345533, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1954 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1291.375, + "completions/mean_terminated_length": 1166.2000732421875, + "completions/min_length": 986.0, + "completions/min_terminated_length": 986.0, + "epoch": 0.48887221805451364, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3090140705269158, + "kl": 0.017669677734375, + "learning_rate": 6.45697997020988e-07, + "loss": 0.0082, + "num_tokens": 88229124.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0121777057647705, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0031344628186671685, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04479774114837731, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09269623828717428, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1955 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1332.125, + "completions/mean_terminated_length": 1308.1429443359375, + "completions/min_length": 932.0, + "completions/min_terminated_length": 932.0, + "epoch": 0.4891222805701425, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9494584823657748, + "kl": 0.00922393798828125, + "learning_rate": 6.453141375979562e-07, + "loss": -0.0344, + "num_tokens": 88266830.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0145604610443115, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.023124472920932167, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.029015856695327014, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09259629622222522, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1956 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1378.0, + "completions/mean_length": 1143.375, + "completions/mean_terminated_length": 1119.60009765625, + "completions/min_length": 900.0, + "completions/min_terminated_length": 900.0, + "epoch": 0.48937234308577143, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.800728252601395, + "kl": 0.02520751953125, + "learning_rate": 6.4493020554872e-07, + "loss": -0.019, + "num_tokens": 88328180.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0101380348205566, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11522291984628658, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19607907596329704, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09953596037316068, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1957 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1256.0, + "completions/max_terminated_length": 1256.0, + "completions/mean_length": 984.0625, + "completions/mean_terminated_length": 984.0625, + "completions/min_length": 725.0, + "completions/min_terminated_length": 725.0, + "epoch": 0.48962240560140036, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.333500907286191, + "kl": 0.020843505859375, + "learning_rate": 6.445462011658228e-07, + "loss": -0.0253, + "num_tokens": 88365101.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0423897504806519, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.003907678779723119, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07811303519387768, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1958 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1357.3125, + "completions/mean_terminated_length": 1214.625, + "completions/min_length": 890.0, + "completions/min_terminated_length": 890.0, + "epoch": 0.4898724681170293, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2298773758463932, + "kl": 0.021514892578125, + "learning_rate": 6.441621247418635e-07, + "loss": 0.002, + "num_tokens": 88421618.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0491529703140259, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.039179455323641636, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.062422537520098006, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14298407059684812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1959 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1185.5, + "completions/mean_terminated_length": 1185.5, + "completions/min_length": 954.0, + "completions/min_terminated_length": 954.0, + "epoch": 0.49012253063265815, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.499898570430454, + "kl": 0.01953125, + "learning_rate": 6.437779765694951e-07, + "loss": 0.0528, + "num_tokens": 88460714.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.883230447769165, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12102953419784838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1960 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1466.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1151.75, + "completions/mean_terminated_length": 1151.75, + "completions/min_length": 1018.0, + "completions/min_terminated_length": 1018.0, + "epoch": 0.4903725931482871, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.427311571718343, + "kl": 0.00836181640625, + "learning_rate": 6.433937569414263e-07, + "loss": 0.0234, + "num_tokens": 88508302.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.721205472946167, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1574826392567687, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1779366657242516, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04849589520621155, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1961 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1408.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 997.4375, + "completions/mean_terminated_length": 997.4375, + "completions/min_length": 618.0, + "completions/min_terminated_length": 618.0, + "epoch": 0.490622655663916, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.988686050482083, + "kl": 0.018402099609375, + "learning_rate": 6.430094661504191e-07, + "loss": 0.0352, + "num_tokens": 88542061.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9128826260566711, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.018817981045038498, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.025263357986308323, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09428090415820636, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1962 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1147.8125, + "completions/mean_terminated_length": 1124.3333740234375, + "completions/min_length": 893.0, + "completions/min_terminated_length": 893.0, + "epoch": 0.49087271817954486, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1844121720800658, + "kl": 0.020843505859375, + "learning_rate": 6.426251044892907e-07, + "loss": -0.0637, + "num_tokens": 88593882.0, + "reward": 0.0, + "reward_std": 1.0239286422729492, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0397521345161931, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08826841104970509, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0938872452190116, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1963 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1449.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1073.125, + "completions/mean_terminated_length": 1073.125, + "completions/min_length": 571.0, + "completions/min_terminated_length": 571.0, + "epoch": 0.4911227806951738, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2098686374777383, + "kl": 0.0146942138671875, + "learning_rate": 6.422406722509119e-07, + "loss": -0.0385, + "num_tokens": 88646308.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8463404178619385, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.025285580682964586, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14125788315755314, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715322, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1964 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 1105.8125, + "completions/mean_terminated_length": 1049.5, + "completions/min_length": 644.0, + "completions/min_terminated_length": 644.0, + "epoch": 0.4913728432108027, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.018596459157704, + "kl": 0.018524169921875, + "learning_rate": 6.418561697282071e-07, + "loss": 0.0219, + "num_tokens": 88699625.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9745585918426514, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09442703521782433, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12295550922317942, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09651328828101766, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1965 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1416.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1105.75, + "completions/mean_terminated_length": 1105.75, + "completions/min_length": 808.0, + "completions/min_terminated_length": 808.0, + "epoch": 0.4916229057264316, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.789702168429756, + "kl": 0.0211181640625, + "learning_rate": 6.414715972141546e-07, + "loss": -0.0241, + "num_tokens": 88735013.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0262424945831299, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01243507202433039, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0902598845985905, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1966 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1201.125, + "completions/mean_terminated_length": 1101.5, + "completions/min_length": 815.0, + "completions/min_terminated_length": 815.0, + "epoch": 0.4918729682420605, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0379877893789216, + "kl": 0.0190887451171875, + "learning_rate": 6.410869550017856e-07, + "loss": 0.0089, + "num_tokens": 88777207.0, + "reward": 0.0, + "reward_std": 0.8276607394218445, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.013583693984331834, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17161867036116746, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15104573749303493, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1967 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1405.0, + "completions/max_terminated_length": 1405.0, + "completions/mean_length": 1151.6875, + "completions/mean_terminated_length": 1151.6875, + "completions/min_length": 941.0, + "completions/min_terminated_length": 941.0, + "epoch": 0.49212303075768943, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.819206382059575, + "kl": 0.01352691650390625, + "learning_rate": 6.407022433841851e-07, + "loss": -0.0129, + "num_tokens": 88824090.0, + "reward": 0.0, + "reward_std": 0.41672950983047485, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0792943919927374, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.050434509601256367, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16278820596099708, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1968 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1012.9375, + "completions/mean_terminated_length": 900.5385131835938, + "completions/min_length": 636.0, + "completions/min_terminated_length": 636.0, + "epoch": 0.49237309327331835, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6663890759220004, + "kl": 0.017181396484375, + "learning_rate": 6.403174626544901e-07, + "loss": -0.0296, + "num_tokens": 88864665.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8614703416824341, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0012939896921566915, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04096351662576354, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11287488977066931, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1969 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1371.0, + "completions/mean_length": 1118.25, + "completions/mean_terminated_length": 1092.800048828125, + "completions/min_length": 939.0, + "completions/min_terminated_length": 939.0, + "epoch": 0.4926231557889472, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9931617505220323, + "kl": 0.017974853515625, + "learning_rate": 6.399326131058913e-07, + "loss": -0.0083, + "num_tokens": 88916845.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9749912023544312, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0699345982798651, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09681086164464753, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04127594582445935, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1970 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1261.0, + "completions/max_terminated_length": 1261.0, + "completions/mean_length": 1146.125, + "completions/mean_terminated_length": 1146.125, + "completions/min_length": 945.0, + "completions/min_terminated_length": 945.0, + "epoch": 0.49287321830457614, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.472458796635427, + "kl": 0.0188751220703125, + "learning_rate": 6.395476950316306e-07, + "loss": 0.0246, + "num_tokens": 88965663.0, + "reward": 0.0, + "reward_std": 0.795340895652771, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01942752771074797, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.039642782185062765, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13655822255780922, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1971 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1131.25, + "completions/mean_terminated_length": 1046.1539306640625, + "completions/min_length": 763.0, + "completions/min_terminated_length": 763.0, + "epoch": 0.49312328082020507, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6395235065538203, + "kl": 0.01611328125, + "learning_rate": 6.391627087250032e-07, + "loss": -0.0285, + "num_tokens": 89008507.0, + "reward": 0.0, + "reward_std": 0.8876531720161438, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04554518309032913, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07796852486294044, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1972 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1003.0, + "completions/max_terminated_length": 1003.0, + "completions/mean_length": 816.0, + "completions/mean_terminated_length": 816.0, + "completions/min_length": 612.0, + "completions/min_terminated_length": 612.0, + "epoch": 0.49337334333583394, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.2214963905046075, + "kl": 0.023712158203125, + "learning_rate": 6.387776544793559e-07, + "loss": -0.0148, + "num_tokens": 89057939.0, + "reward": 0.0, + "reward_std": 0.5716917514801025, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09459401805964943, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12678539021372992, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09583937179043481, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1973 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1387.0, + "completions/mean_length": 1012.625, + "completions/mean_terminated_length": 980.1333618164062, + "completions/min_length": 750.0, + "completions/min_terminated_length": 750.0, + "epoch": 0.49362340585146286, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.140963358559289, + "kl": 0.0220489501953125, + "learning_rate": 6.383925325880869e-07, + "loss": -0.0063, + "num_tokens": 89110733.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0621397495269775, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04197337545821897, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06516956183852074, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0925962962222252, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1974 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1093.125, + "completions/mean_terminated_length": 957.5, + "completions/min_length": 619.0, + "completions/min_terminated_length": 619.0, + "epoch": 0.4938734683670918, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8309061006494476, + "kl": 0.0149078369140625, + "learning_rate": 6.380073433446467e-07, + "loss": 0.0521, + "num_tokens": 89153519.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9578405022621155, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.13408784834153745, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1604717713933321, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194862, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1975 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1390.0, + "completions/mean_length": 1101.0, + "completions/mean_terminated_length": 1074.4000244140625, + "completions/min_length": 647.0, + "completions/min_terminated_length": 647.0, + "epoch": 0.4941235308827207, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.400546630842098, + "kl": 0.020721435546875, + "learning_rate": 6.376220870425363e-07, + "loss": 0.0151, + "num_tokens": 89207295.0, + "reward": 4.470348358154297e-08, + "reward_std": 0.9288556575775146, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.2767935297326886, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3500141597811039, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1976 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1114.0, + "completions/max_terminated_length": 1114.0, + "completions/mean_length": 802.5, + "completions/mean_terminated_length": 802.5, + "completions/min_length": 499.0, + "completions/min_terminated_length": 499.0, + "epoch": 0.4943735933983496, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7651945735494055, + "kl": 0.019561767578125, + "learning_rate": 6.372367639753085e-07, + "loss": 0.0126, + "num_tokens": 89236399.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6975257992744446, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.15892030193461332, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.096728483195616, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0768596604689834, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1977 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1117.0, + "completions/max_terminated_length": 1117.0, + "completions/mean_length": 913.8125, + "completions/mean_terminated_length": 913.8125, + "completions/min_length": 765.0, + "completions/min_terminated_length": 765.0, + "epoch": 0.4946236559139785, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1128792817246653, + "kl": 0.02276611328125, + "learning_rate": 6.368513744365665e-07, + "loss": 0.0079, + "num_tokens": 89274508.0, + "reward": 3.725290298461914e-08, + "reward_std": 1.025817632675171, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.020094532674919282, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.052059519505662094, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13305526559931294, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1978 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1147.875, + "completions/mean_terminated_length": 1097.571533203125, + "completions/min_length": 795.0, + "completions/min_terminated_length": 795.0, + "epoch": 0.4948737184296074, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2157890766596924, + "kl": 0.0167236328125, + "learning_rate": 6.364659187199643e-07, + "loss": 0.0019, + "num_tokens": 89314978.0, + "reward": 0.0, + "reward_std": 0.5906205177307129, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1250284587544152, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2689629408728659, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11147163731607215, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1979 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 968.6875, + "completions/mean_terminated_length": 968.6875, + "completions/min_length": 609.0, + "completions/min_terminated_length": 609.0, + "epoch": 0.4951237809452363, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.490043835675671, + "kl": 0.021514892578125, + "learning_rate": 6.360803971192066e-07, + "loss": -0.065, + "num_tokens": 89360181.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7411574125289917, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01969753531043711, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09948495449168177, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11155467020454343, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1980 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1428.0, + "completions/mean_length": 1289.25, + "completions/mean_terminated_length": 1193.45458984375, + "completions/min_length": 1071.0, + "completions/min_terminated_length": 1071.0, + "epoch": 0.4953738434608652, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9910192858655735, + "kl": 0.021270751953125, + "learning_rate": 6.356948099280476e-07, + "loss": -0.0207, + "num_tokens": 89410433.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9926652908325195, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.022985784643964682, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08826161446807106, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.067631901304592, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1981 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1345.0, + "completions/max_terminated_length": 1345.0, + "completions/mean_length": 1035.8125, + "completions/mean_terminated_length": 1035.8125, + "completions/min_length": 813.0, + "completions/min_terminated_length": 813.0, + "epoch": 0.49562390597649414, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.718543249686524, + "kl": 0.03125, + "learning_rate": 6.353091574402923e-07, + "loss": 0.0316, + "num_tokens": 89459486.0, + "reward": 0.0, + "reward_std": 0.7993665933609009, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05455198390692433, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.053717336345637516, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09953596037316068, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1982 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1233.1875, + "completions/mean_terminated_length": 1195.071533203125, + "completions/min_length": 801.0, + "completions/min_terminated_length": 801.0, + "epoch": 0.495873968492123, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5609500415923248, + "kl": 0.021331787109375, + "learning_rate": 6.349234399497949e-07, + "loss": -0.0308, + "num_tokens": 89500921.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7505053877830505, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02584184603577581, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19139188189358425, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11792967144619462, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1983 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 1109.375, + "completions/mean_terminated_length": 1083.3333740234375, + "completions/min_length": 768.0, + "completions/min_terminated_length": 768.0, + "epoch": 0.49612403100775193, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4480510981106733, + "kl": 0.019775390625, + "learning_rate": 6.345376577504596e-07, + "loss": -0.0663, + "num_tokens": 89552647.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.5413711071014404, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09097300658431397, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08146340873464415, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05561108336107647, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1984 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1292.0, + "completions/max_terminated_length": 1292.0, + "completions/mean_length": 1056.625, + "completions/mean_terminated_length": 1056.625, + "completions/min_length": 847.0, + "completions/min_terminated_length": 847.0, + "epoch": 0.49637409352338085, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.188081411830809, + "kl": 0.011383056640625, + "learning_rate": 6.34151811136239e-07, + "loss": -0.0009, + "num_tokens": 89589081.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8867664337158203, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.027983026044141456, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08758949043675292, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059472994182545084, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1985 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1351.4375, + "completions/mean_terminated_length": 1301.916748046875, + "completions/min_length": 1069.0, + "completions/min_terminated_length": 1069.0, + "epoch": 0.4966241560390098, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.694001886678259, + "kl": 0.01275634765625, + "learning_rate": 6.337659004011359e-07, + "loss": -0.0171, + "num_tokens": 89642856.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9238661527633667, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09037065611080064, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10926621571577529, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09953596037316068, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1986 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1423.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1004.4375, + "completions/mean_terminated_length": 1004.4375, + "completions/min_length": 562.0, + "completions/min_terminated_length": 562.0, + "epoch": 0.49687421855463865, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0757494920160293, + "kl": 0.02105712890625, + "learning_rate": 6.333799258392015e-07, + "loss": -0.043, + "num_tokens": 89689463.0, + "reward": 0.0, + "reward_std": 0.8907145261764526, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.044832088828799396, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07415139223443394, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10809803506625451, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1987 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1328.875, + "completions/mean_terminated_length": 1195.77783203125, + "completions/min_length": 593.0, + "completions/min_terminated_length": 593.0, + "epoch": 0.49712428107026757, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.56348403463232, + "kl": 0.015411376953125, + "learning_rate": 6.329938877445353e-07, + "loss": 0.0426, + "num_tokens": 89748341.0, + "reward": 0.0, + "reward_std": 0.5141884088516235, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03487654173716429, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06065875148600354, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11538983843829066, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1988 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1421.0, + "completions/mean_length": 1213.5625, + "completions/mean_terminated_length": 1118.0833740234375, + "completions/min_length": 540.0, + "completions/min_terminated_length": 540.0, + "epoch": 0.4973743435858965, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3172620323927737, + "kl": 0.0172119140625, + "learning_rate": 6.326077864112853e-07, + "loss": -0.023, + "num_tokens": 89800630.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0057127475738525, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04288935400521822, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09436464353452462, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1989 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1289.0, + "completions/max_terminated_length": 1289.0, + "completions/mean_length": 966.125, + "completions/mean_terminated_length": 966.125, + "completions/min_length": 728.0, + "completions/min_terminated_length": 728.0, + "epoch": 0.49762440610152536, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.176452904426223, + "kl": 0.0164794921875, + "learning_rate": 6.322216221336485e-07, + "loss": -0.0428, + "num_tokens": 89838752.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8840538263320923, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.17239540228105205, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1713218008764966, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1990 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1332.0, + "completions/max_terminated_length": 1332.0, + "completions/mean_length": 1070.625, + "completions/mean_terminated_length": 1070.625, + "completions/min_length": 744.0, + "completions/min_terminated_length": 744.0, + "epoch": 0.4978744686171543, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.796455023922156, + "kl": 0.0181884765625, + "learning_rate": 6.318353952058688e-07, + "loss": -0.0432, + "num_tokens": 89884090.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5388092994689941, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.035812961038516763, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.24136119624947622, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11979921473804349, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1991 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1312.0625, + "completions/mean_terminated_length": 1299.533447265625, + "completions/min_length": 1109.0, + "completions/min_terminated_length": 1109.0, + "epoch": 0.4981245311327832, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.012895072516287, + "kl": 0.0141448974609375, + "learning_rate": 6.314491059222383e-07, + "loss": 0.0042, + "num_tokens": 89928003.0, + "reward": 0.0, + "reward_std": 0.9095876216888428, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12409802949717005, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20714098196938985, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09888264649460886, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1992 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1480.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1152.1875, + "completions/mean_terminated_length": 1152.1875, + "completions/min_length": 816.0, + "completions/min_terminated_length": 816.0, + "epoch": 0.4983745936484121, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1158804378282476, + "kl": 0.0147247314453125, + "learning_rate": 6.310627545770969e-07, + "loss": -0.0357, + "num_tokens": 89966662.0, + "reward": 2.2351741790771484e-08, + "reward_std": 0.9556637406349182, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06302023738557516, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06807922845530738, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05692750425533113, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1993 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 1041.9375, + "completions/mean_terminated_length": 936.2307739257812, + "completions/min_length": 560.0, + "completions/min_terminated_length": 560.0, + "epoch": 0.498624656164041, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.484995084409857, + "kl": 0.0146331787109375, + "learning_rate": 6.30676341464831e-07, + "loss": -0.0038, + "num_tokens": 90001797.0, + "reward": 0.0, + "reward_std": 0.6389721632003784, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02242430231812042, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03160637048612339, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07698003589195013, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1994 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1362.0, + "completions/mean_length": 1258.25, + "completions/mean_terminated_length": 1070.2222900390625, + "completions/min_length": 720.0, + "completions/min_terminated_length": 720.0, + "epoch": 0.4988747186796699, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4486044452681055, + "kl": 0.01202392578125, + "learning_rate": 6.30289866879875e-07, + "loss": -0.0236, + "num_tokens": 90056417.0, + "reward": 0.0, + "reward_std": 0.995840847492218, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.18884426163282922, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19693224713730292, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715322, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1995 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1274.0, + "completions/max_terminated_length": 1274.0, + "completions/mean_length": 1024.625, + "completions/mean_terminated_length": 1024.625, + "completions/min_length": 759.0, + "completions/min_terminated_length": 759.0, + "epoch": 0.49912478119529885, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3172626967963317, + "kl": 0.0155487060546875, + "learning_rate": 6.299033311167095e-07, + "loss": -0.0037, + "num_tokens": 90088763.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9763874411582947, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.22141635670268933, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21470234508384436, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563384, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 1996 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1264.0, + "completions/max_terminated_length": 1264.0, + "completions/mean_length": 1037.0, + "completions/mean_terminated_length": 1037.0, + "completions/min_length": 751.0, + "completions/min_terminated_length": 751.0, + "epoch": 0.4993748437109277, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4546748182525464, + "kl": 0.00963592529296875, + "learning_rate": 6.295167344698619e-07, + "loss": 0.0232, + "num_tokens": 90126203.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8272068500518799, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1173051688710578, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07533379694341773, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13158576980363348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1997 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1264.0, + "completions/mean_length": 1094.5625, + "completions/mean_terminated_length": 1001.0000610351562, + "completions/min_length": 683.0, + "completions/min_terminated_length": 683.0, + "epoch": 0.49962490622655664, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7406421115994637, + "kl": 0.0133514404296875, + "learning_rate": 6.291300772339059e-07, + "loss": 0.0018, + "num_tokens": 90176636.0, + "reward": 0.0, + "reward_std": 0.7947179675102234, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.029006366629659247, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07612108913414994, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1998 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1289.75, + "completions/mean_terminated_length": 1163.5999755859375, + "completions/min_length": 873.0, + "completions/min_terminated_length": 873.0, + "epoch": 0.49987496874218557, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.019782461405825, + "kl": 0.0174102783203125, + "learning_rate": 6.287433597034616e-07, + "loss": -0.0039, + "num_tokens": 90232512.0, + "reward": 0.0, + "reward_std": 0.7440252304077148, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01822240567969707, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07176264787848224, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 1999 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1125.1875, + "completions/mean_terminated_length": 1100.2000732421875, + "completions/min_length": 883.0, + "completions/min_terminated_length": 883.0, + "epoch": 0.5001250312578145, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1007935104009308, + "kl": 0.01617431640625, + "learning_rate": 6.283565821731948e-07, + "loss": -0.068, + "num_tokens": 90282339.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0521517992019653, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03583762594149756, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0841440559797888, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06440611887195309, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2000 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1251.0, + "completions/max_terminated_length": 1251.0, + "completions/mean_length": 1038.5625, + "completions/mean_terminated_length": 1038.5625, + "completions/min_length": 740.0, + "completions/min_terminated_length": 740.0, + "epoch": 0.5003750937734434, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.360499303320881, + "kl": 0.0166778564453125, + "learning_rate": 6.279697449378172e-07, + "loss": -0.0502, + "num_tokens": 90330164.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.067063570022583, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07161267086747633, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06797488947807702, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1128748897706693, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2001 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1211.25, + "completions/mean_terminated_length": 1038.0, + "completions/min_length": 803.0, + "completions/min_terminated_length": 803.0, + "epoch": 0.5006251562890722, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8729838894750417, + "kl": 0.0169830322265625, + "learning_rate": 6.275828482920859e-07, + "loss": 0.0188, + "num_tokens": 90378992.0, + "reward": 0.0, + "reward_std": 0.6033382415771484, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02609545066137067, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06983177485582398, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715323, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2002 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1406.0, + "completions/mean_length": 1245.5625, + "completions/mean_terminated_length": 1047.6666259765625, + "completions/min_length": 769.0, + "completions/min_terminated_length": 769.0, + "epoch": 0.5008752188047012, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7529540235392314, + "kl": 0.0126495361328125, + "learning_rate": 6.271958925308034e-07, + "loss": -0.0906, + "num_tokens": 90425273.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9411312937736511, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09678001163484302, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.035677456449345044, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590965, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2003 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1220.0, + "completions/max_terminated_length": 1220.0, + "completions/mean_length": 1015.5, + "completions/mean_terminated_length": 1015.5, + "completions/min_length": 901.0, + "completions/min_terminated_length": 901.0, + "epoch": 0.5011252813203301, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8550431917570154, + "kl": 0.020233154296875, + "learning_rate": 6.268088779488168e-07, + "loss": 0.0285, + "num_tokens": 90465817.0, + "reward": 0.0, + "reward_std": 0.772530734539032, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08585371193337303, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14053224687839366, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12224747213928168, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2004 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1127.0, + "completions/max_terminated_length": 1127.0, + "completions/mean_length": 942.6875, + "completions/mean_terminated_length": 942.6875, + "completions/min_length": 874.0, + "completions/min_terminated_length": 874.0, + "epoch": 0.5013753438359589, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.560872136671241, + "kl": 0.010530471801757812, + "learning_rate": 6.264218048410187e-07, + "loss": 0.0093, + "num_tokens": 90509564.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0345324277877808, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.19168034167283785, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09538061116215932, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05708992257184506, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2005 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1237.6875, + "completions/mean_terminated_length": 1220.2000732421875, + "completions/min_length": 981.0, + "completions/min_terminated_length": 981.0, + "epoch": 0.5016254063515879, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5530987299477976, + "kl": 0.01141357421875, + "learning_rate": 6.260346735023457e-07, + "loss": 0.0157, + "num_tokens": 90553599.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.4735937714576721, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1545841496313129, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.253758390686713, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059472994182545084, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2006 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1272.25, + "completions/mean_terminated_length": 1168.727294921875, + "completions/min_length": 922.0, + "completions/min_terminated_length": 922.0, + "epoch": 0.5018754688672168, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.798756177656917, + "kl": 0.0134429931640625, + "learning_rate": 6.256474842277791e-07, + "loss": 0.0011, + "num_tokens": 90607347.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.6490448713302612, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05753867780607999, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19988413047163633, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066221, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2007 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1069.0, + "completions/mean_length": 1250.9375, + "completions/mean_terminated_length": 1001.875, + "completions/min_length": 968.0, + "completions/min_terminated_length": 968.0, + "epoch": 0.5021255313828457, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9172426041412098, + "kl": 0.00670623779296875, + "learning_rate": 6.252602373123444e-07, + "loss": -0.0006, + "num_tokens": 90661218.0, + "reward": 0.0, + "reward_std": 0.24317243695259094, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08029047226776881, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10708461119098804, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16233253479155635, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2008 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1407.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1144.875, + "completions/mean_terminated_length": 1144.875, + "completions/min_length": 878.0, + "completions/min_terminated_length": 878.0, + "epoch": 0.5023755938984746, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7146959195060183, + "kl": 0.0112762451171875, + "learning_rate": 6.248729330511105e-07, + "loss": 0.0002, + "num_tokens": 90703768.0, + "reward": 0.0, + "reward_std": 0.613000750541687, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.027519436048529616, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05144272694774026, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2009 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1283.4375, + "completions/mean_terminated_length": 1211.25, + "completions/min_length": 945.0, + "completions/min_terminated_length": 945.0, + "epoch": 0.5026256564141035, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.417908475988924, + "kl": 0.0093994140625, + "learning_rate": 6.244855717391908e-07, + "loss": -0.0142, + "num_tokens": 90760439.0, + "reward": -1.862645149230957e-08, + "reward_std": 1.0512363910675049, + "rewards/wordcountpos_reward_GEOBench/mean": -1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00012953567080478505, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05973682420443361, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05900408021045227, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 2010 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1387.0, + "completions/mean_length": 1113.1875, + "completions/mean_terminated_length": 1087.4000244140625, + "completions/min_length": 935.0, + "completions/min_terminated_length": 935.0, + "epoch": 0.5028757189297325, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.401726294355412, + "kl": 0.0184173583984375, + "learning_rate": 6.240981536717414e-07, + "loss": 0.0255, + "num_tokens": 90811218.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.7767967581748962, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0383059605584631, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0773826741793105, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05163977794943226, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2011 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1387.0, + "completions/mean_length": 1066.875, + "completions/mean_terminated_length": 1038.0, + "completions/min_length": 838.0, + "completions/min_terminated_length": 838.0, + "epoch": 0.5031257814453614, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4254394264297927, + "kl": 0.0144500732421875, + "learning_rate": 6.237106791439624e-07, + "loss": 0.0053, + "num_tokens": 90839728.0, + "reward": 0.0, + "reward_std": 0.6925433874130249, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0945993011872293, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10981683014469941, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2012 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1179.875, + "completions/mean_terminated_length": 1106.0, + "completions/min_length": 806.0, + "completions/min_terminated_length": 806.0, + "epoch": 0.5033758439609902, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1939106771426444, + "kl": 0.0157012939453125, + "learning_rate": 6.233231484510958e-07, + "loss": -0.0328, + "num_tokens": 90890446.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.8915489912033081, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.019862399106526832, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13988750263057845, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353545, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2013 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1374.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 1049.875, + "completions/mean_terminated_length": 1049.875, + "completions/min_length": 638.0, + "completions/min_terminated_length": 638.0, + "epoch": 0.5036259064766192, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0606146605968965, + "kl": 0.00997161865234375, + "learning_rate": 6.229355618884279e-07, + "loss": -0.0331, + "num_tokens": 90935284.0, + "reward": 0.0, + "reward_std": 0.5146667957305908, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08073281318267277, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19108248070105016, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13662601021279466, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2014 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1217.75, + "completions/mean_terminated_length": 1177.4285888671875, + "completions/min_length": 923.0, + "completions/min_terminated_length": 923.0, + "epoch": 0.5038759689922481, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2774220657442843, + "kl": 0.0103302001953125, + "learning_rate": 6.225479197512863e-07, + "loss": 0.0229, + "num_tokens": 90978584.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.406349241733551, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.20792493027969122, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07425130978606775, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04849589520621153, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2015 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1142.0, + "completions/max_terminated_length": 1142.0, + "completions/mean_length": 956.5625, + "completions/mean_terminated_length": 956.5625, + "completions/min_length": 596.0, + "completions/min_terminated_length": 596.0, + "epoch": 0.5041260315078769, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.444339138409584, + "kl": 0.0146331787109375, + "learning_rate": 6.221602223350415e-07, + "loss": 0.0068, + "num_tokens": 91026777.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0593595504760742, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10072906481916856, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14294913049637767, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11417984514369003, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2016 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1055.0, + "completions/max_terminated_length": 1055.0, + "completions/mean_length": 953.125, + "completions/mean_terminated_length": 953.125, + "completions/min_length": 758.0, + "completions/min_terminated_length": 758.0, + "epoch": 0.5043760940235059, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.117273498842334, + "kl": 0.0154876708984375, + "learning_rate": 6.217724699351063e-07, + "loss": 0.0041, + "num_tokens": 91075747.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9719052314758301, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12503585694334915, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15757792264909848, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078612, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2017 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1031.1875, + "completions/mean_terminated_length": 999.9334106445312, + "completions/min_length": 500.0, + "completions/min_terminated_length": 500.0, + "epoch": 0.5046261565391348, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.541339175751418, + "kl": 0.019622802734375, + "learning_rate": 6.213846628469346e-07, + "loss": -0.0577, + "num_tokens": 91112566.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0188775062561035, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.048810025149485944, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12138104836694648, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1031898645611484, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2018 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1095.875, + "completions/mean_terminated_length": 1068.933349609375, + "completions/min_length": 734.0, + "completions/min_terminated_length": 734.0, + "epoch": 0.5048762190547637, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.71441214939748, + "kl": 0.0136871337890625, + "learning_rate": 6.209968013660231e-07, + "loss": 0.0264, + "num_tokens": 91156196.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9967584609985352, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04822918537219368, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0633238970676124, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09428090415820635, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2019 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1301.0, + "completions/mean_length": 1104.6875, + "completions/mean_terminated_length": 1078.3333740234375, + "completions/min_length": 864.0, + "completions/min_terminated_length": 864.0, + "epoch": 0.5051262815703926, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4418383746201586, + "kl": 0.01666259765625, + "learning_rate": 6.206088857879091e-07, + "loss": -0.0095, + "num_tokens": 91206479.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7908489108085632, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05410411713446016, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02547369803211522, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07302967433402217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2020 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1350.0, + "completions/mean_length": 948.9375, + "completions/mean_terminated_length": 912.2000732421875, + "completions/min_length": 560.0, + "completions/min_terminated_length": 560.0, + "epoch": 0.5053763440860215, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6679847493834, + "kl": 0.0186767578125, + "learning_rate": 6.202209164081711e-07, + "loss": 0.0425, + "num_tokens": 91242926.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.0688472986221313, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03987386598662628, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08421389933338491, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590968, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2021 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1419.0, + "completions/mean_length": 1138.625, + "completions/mean_terminated_length": 1055.2308349609375, + "completions/min_length": 723.0, + "completions/min_terminated_length": 723.0, + "epoch": 0.5056264066016504, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2979035924954148, + "kl": 0.016998291015625, + "learning_rate": 6.198328935224294e-07, + "loss": -0.0179, + "num_tokens": 91297872.0, + "reward": 0.0, + "reward_std": 0.5715004801750183, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.048160233182146614, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07571590238520495, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15587269259333497, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2022 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1436.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1033.8125, + "completions/mean_terminated_length": 1033.8125, + "completions/min_length": 648.0, + "completions/min_terminated_length": 648.0, + "epoch": 0.5058764691172793, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3305546859294286, + "kl": 0.0177459716796875, + "learning_rate": 6.194448174263442e-07, + "loss": -0.0236, + "num_tokens": 91350437.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8846057653427124, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01890866126456233, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21224879841788308, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066471, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2023 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1214.3125, + "completions/mean_terminated_length": 1195.2667236328125, + "completions/min_length": 1043.0, + "completions/min_terminated_length": 1043.0, + "epoch": 0.5061265316329082, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.925885633870664, + "kl": 0.011871337890625, + "learning_rate": 6.190566884156167e-07, + "loss": -0.0288, + "num_tokens": 91401010.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.027402400970459, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02078383790174819, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08144644775637201, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13109227736669002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2024 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1330.0, + "completions/mean_length": 1160.4375, + "completions/mean_terminated_length": 1137.800048828125, + "completions/min_length": 1009.0, + "completions/min_terminated_length": 1009.0, + "epoch": 0.5063765941485371, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8212782599596786, + "kl": 0.0108184814453125, + "learning_rate": 6.18668506785988e-07, + "loss": 0.0026, + "num_tokens": 91446033.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6183702349662781, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05418435090350162, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06484039301593657, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901863, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2025 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1028.0, + "completions/max_terminated_length": 1028.0, + "completions/mean_length": 807.25, + "completions/mean_terminated_length": 807.25, + "completions/min_length": 641.0, + "completions/min_terminated_length": 641.0, + "epoch": 0.5066266566641661, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4190388556974383, + "kl": 0.015838623046875, + "learning_rate": 6.182802728332404e-07, + "loss": -0.003, + "num_tokens": 91472789.0, + "reward": 0.0, + "reward_std": 1.0261061191558838, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06922273186662314, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14393415836528076, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869923, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2026 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1297.0, + "completions/max_terminated_length": 1297.0, + "completions/mean_length": 899.125, + "completions/mean_terminated_length": 899.125, + "completions/min_length": 633.0, + "completions/min_terminated_length": 633.0, + "epoch": 0.5068767191797949, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6354174042336966, + "kl": 0.008636474609375, + "learning_rate": 6.178919868531943e-07, + "loss": 0.0659, + "num_tokens": 91514511.0, + "reward": 0.0, + "reward_std": 0.9473845958709717, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.015642058880807882, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02494670331574122, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10886621079036349, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2027 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1270.375, + "completions/mean_terminated_length": 1166.0, + "completions/min_length": 799.0, + "completions/min_terminated_length": 799.0, + "epoch": 0.5071267816954238, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8771826812021946, + "kl": 0.0162811279296875, + "learning_rate": 6.175036491417113e-07, + "loss": -0.0205, + "num_tokens": 91573893.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.417508602142334, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05189613345535482, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0652237266022054, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1246476515504285, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2028 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1381.0, + "completions/mean_length": 1190.75, + "completions/mean_terminated_length": 1146.571533203125, + "completions/min_length": 953.0, + "completions/min_terminated_length": 953.0, + "epoch": 0.5073768442110528, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3929901134814573, + "kl": 0.01043701171875, + "learning_rate": 6.171152599946916e-07, + "loss": 0.0162, + "num_tokens": 91621753.0, + "reward": 0.0, + "reward_std": 0.4586096704006195, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.038374594676695656, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12675371559410448, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0859586463881842, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2029 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1426.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1080.6875, + "completions/mean_terminated_length": 1080.6875, + "completions/min_length": 805.0, + "completions/min_terminated_length": 805.0, + "epoch": 0.5076269067266816, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1350652110904624, + "kl": 0.017425537109375, + "learning_rate": 6.167268197080747e-07, + "loss": -0.0419, + "num_tokens": 91667436.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.946037769317627, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0018390711106002783, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05492782656610846, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13045504405165223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2030 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1320.0, + "completions/mean_length": 1089.8125, + "completions/mean_terminated_length": 1062.4666748046875, + "completions/min_length": 881.0, + "completions/min_terminated_length": 881.0, + "epoch": 0.5078769692423106, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.487482675176948, + "kl": 0.019134521484375, + "learning_rate": 6.163383285778397e-07, + "loss": -0.0053, + "num_tokens": 91712113.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0436747074127197, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019313205974243332, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0597900528372524, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10749676997731401, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2031 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1384.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1211.6875, + "completions/mean_terminated_length": 1211.6875, + "completions/min_length": 941.0, + "completions/min_terminated_length": 941.0, + "epoch": 0.5081270317579395, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.12354751597957, + "kl": 0.01397705078125, + "learning_rate": 6.159497869000035e-07, + "loss": 0.0106, + "num_tokens": 91763204.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.0369948148727417, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08472627145295117, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.042780329743824515, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000003, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2032 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1340.0, + "completions/max_terminated_length": 1340.0, + "completions/mean_length": 1121.3125, + "completions/mean_terminated_length": 1121.3125, + "completions/min_length": 793.0, + "completions/min_terminated_length": 793.0, + "epoch": 0.5083770942735684, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7466210628484053, + "kl": 0.021209716796875, + "learning_rate": 6.155611949706221e-07, + "loss": 0.0175, + "num_tokens": 91815761.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.01591157913208, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.15927824378462754, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10647957768096025, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10027739304327551, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2033 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 861.0, + "completions/max_terminated_length": 861.0, + "completions/mean_length": 647.75, + "completions/mean_terminated_length": 647.75, + "completions/min_length": 510.0, + "completions/min_terminated_length": 510.0, + "epoch": 0.5086271567891973, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6329861054341976, + "kl": 0.014678955078125, + "learning_rate": 6.151725530857893e-07, + "loss": 0.0296, + "num_tokens": 91842821.0, + "reward": 0.0, + "reward_std": 0.7133623957633972, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1414179237949076, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07398512712349821, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12292725943057184, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2034 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1390.0, + "completions/mean_length": 1121.9375, + "completions/mean_terminated_length": 1067.9285888671875, + "completions/min_length": 815.0, + "completions/min_terminated_length": 815.0, + "epoch": 0.5088772193048262, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5144891393227926, + "kl": 0.0097808837890625, + "learning_rate": 6.147838615416378e-07, + "loss": -0.0312, + "num_tokens": 91894228.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9684914350509644, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.13790320266069173, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0593280963292026, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941139, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2035 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1209.0, + "completions/mean_length": 1261.0625, + "completions/mean_terminated_length": 1022.125, + "completions/min_length": 783.0, + "completions/min_terminated_length": 783.0, + "epoch": 0.5091272818204551, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7685917143591783, + "kl": 0.0153045654296875, + "learning_rate": 6.143951206343374e-07, + "loss": -0.0036, + "num_tokens": 91949453.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5696409940719604, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04918606822526038, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18313990088195192, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09339283817414602, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2036 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1261.0, + "completions/mean_length": 1146.4375, + "completions/mean_terminated_length": 1064.84619140625, + "completions/min_length": 799.0, + "completions/min_terminated_length": 799.0, + "epoch": 0.5093773443360841, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6834497042324905, + "kl": 0.0152740478515625, + "learning_rate": 6.140063306600958e-07, + "loss": 0.0394, + "num_tokens": 92000476.0, + "reward": 0.0, + "reward_std": 1.0063549280166626, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.14265307335741836, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11665363213111492, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746357, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2037 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1450.375, + "completions/mean_terminated_length": 1367.666748046875, + "completions/min_length": 1155.0, + "completions/min_terminated_length": 1155.0, + "epoch": 0.5096274068517129, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4391025751858213, + "kl": 0.010498046875, + "learning_rate": 6.136174919151578e-07, + "loss": -0.0162, + "num_tokens": 92047666.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7270627021789551, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04583230558204282, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07100481446035244, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2038 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1331.6875, + "completions/mean_terminated_length": 1292.84619140625, + "completions/min_length": 1123.0, + "completions/min_terminated_length": 1123.0, + "epoch": 0.5098774693673418, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7943497514736864, + "kl": 0.011474609375, + "learning_rate": 6.132286046958057e-07, + "loss": -0.0032, + "num_tokens": 92084541.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8529057502746582, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03850733070857289, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14609266922317166, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11409872268574493, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2039 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1241.5, + "completions/mean_terminated_length": 1124.0, + "completions/min_length": 873.0, + "completions/min_terminated_length": 873.0, + "epoch": 0.5101275318829708, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9219351658750923, + "kl": 0.010528564453125, + "learning_rate": 6.12839669298359e-07, + "loss": 0.0068, + "num_tokens": 92130341.0, + "reward": 0.0, + "reward_std": 0.8746392726898193, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00849702954689546, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03571071460790342, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11917929226045818, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2040 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1407.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1185.875, + "completions/mean_terminated_length": 1185.875, + "completions/min_length": 948.0, + "completions/min_terminated_length": 948.0, + "epoch": 0.5103775943985996, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.401066596858409, + "kl": 0.0171051025390625, + "learning_rate": 6.124506860191731e-07, + "loss": -0.0394, + "num_tokens": 92182675.0, + "reward": 0.0, + "reward_std": 0.8542007803916931, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.008970767803729332, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05407762508629519, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09428090415820636, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2041 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1380.0, + "completions/mean_length": 1118.3125, + "completions/mean_terminated_length": 1092.86669921875, + "completions/min_length": 757.0, + "completions/min_terminated_length": 757.0, + "epoch": 0.5106276569142285, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4082459292193104, + "kl": 0.017913818359375, + "learning_rate": 6.120616551546405e-07, + "loss": 0.0213, + "num_tokens": 92234912.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7643733620643616, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03678445570566448, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1725118125978391, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07084150279686706, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2042 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1166.0, + "completions/max_terminated_length": 1166.0, + "completions/mean_length": 984.3125, + "completions/mean_terminated_length": 984.3125, + "completions/min_length": 736.0, + "completions/min_terminated_length": 736.0, + "epoch": 0.5108777194298575, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6777040220172887, + "kl": 0.0153656005859375, + "learning_rate": 6.116725770011899e-07, + "loss": -0.0154, + "num_tokens": 92277701.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6911045908927917, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03417319441790371, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.043738772898649844, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04533823502911818, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 2043 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1263.1875, + "completions/mean_terminated_length": 1121.0999755859375, + "completions/min_length": 883.0, + "completions/min_terminated_length": 883.0, + "epoch": 0.5111277819454864, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.7908079953556895, + "kl": 0.009124755859375, + "learning_rate": 6.112834518552858e-07, + "loss": 0.0095, + "num_tokens": 92316416.0, + "reward": 0.0, + "reward_std": 0.45772814750671387, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.13800190316606983, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15279802201000167, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620104, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2044 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1179.0, + "completions/max_terminated_length": 1179.0, + "completions/mean_length": 925.0, + "completions/mean_terminated_length": 925.0, + "completions/min_length": 747.0, + "completions/min_terminated_length": 747.0, + "epoch": 0.5113778444611152, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.036781451567229, + "kl": 0.00672149658203125, + "learning_rate": 6.108942800134286e-07, + "loss": -0.002, + "num_tokens": 92353616.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9935612678527832, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03174157643964931, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06793708552809528, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2045 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1279.375, + "completions/mean_terminated_length": 1205.8333740234375, + "completions/min_length": 833.0, + "completions/min_terminated_length": 833.0, + "epoch": 0.5116279069767442, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3849368091649135, + "kl": 0.019927978515625, + "learning_rate": 6.105050617721542e-07, + "loss": 0.0255, + "num_tokens": 92407622.0, + "reward": 0.0, + "reward_std": 0.5300437211990356, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.119294059758883, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19798023549408963, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08777074514725114, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2046 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1353.0, + "completions/mean_length": 1019.5, + "completions/mean_terminated_length": 987.4667358398438, + "completions/min_length": 532.0, + "completions/min_terminated_length": 532.0, + "epoch": 0.5118779694923731, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.058241849445832, + "kl": 0.0141448974609375, + "learning_rate": 6.101157974280342e-07, + "loss": 0.0155, + "num_tokens": 92436654.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5009094476699829, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04735830194480971, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0566328297750236, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1567612007930345, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2047 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1467.0, + "completions/mean_length": 1101.5625, + "completions/mean_terminated_length": 1075.0, + "completions/min_length": 722.0, + "completions/min_terminated_length": 722.0, + "epoch": 0.512128032008002, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9377713195165853, + "kl": 0.012054443359375, + "learning_rate": 6.097264872776749e-07, + "loss": 0.0214, + "num_tokens": 92475431.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.042947769165039, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05183774617910206, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0848090786317312, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2048 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1419.0625, + "completions/mean_terminated_length": 1338.125, + "completions/min_length": 1065.0, + "completions/min_terminated_length": 1065.0, + "epoch": 0.5123780945236309, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.001039599753915, + "kl": 0.01409912109375, + "learning_rate": 6.093371316177177e-07, + "loss": -0.0076, + "num_tokens": 92531872.0, + "reward": 0.0, + "reward_std": 0.9984475374221802, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07333075368441207, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08617596934055678, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07187952884282608, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2049 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1202.25, + "completions/mean_terminated_length": 1023.6000366210938, + "completions/min_length": 715.0, + "completions/min_terminated_length": 715.0, + "epoch": 0.5126281570392598, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1956436872403935, + "kl": 0.0164642333984375, + "learning_rate": 6.08947730744839e-07, + "loss": -0.0496, + "num_tokens": 92573820.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6259706616401672, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04648600418945268, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08655666193697077, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2050 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1308.0, + "completions/max_terminated_length": 1308.0, + "completions/mean_length": 858.25, + "completions/mean_terminated_length": 858.25, + "completions/min_length": 631.0, + "completions/min_terminated_length": 631.0, + "epoch": 0.5128782195548888, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.576959567644209, + "kl": 0.015869140625, + "learning_rate": 6.085582849557487e-07, + "loss": -0.1029, + "num_tokens": 92613104.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9593731164932251, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09327402526471995, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1221016424813519, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07084150279686705, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2051 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1331.0, + "completions/mean_length": 1076.5625, + "completions/mean_terminated_length": 1048.3333740234375, + "completions/min_length": 799.0, + "completions/min_terminated_length": 799.0, + "epoch": 0.5131282820705176, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7676694878885315, + "kl": 0.020416259765625, + "learning_rate": 6.081687945471923e-07, + "loss": 0.0288, + "num_tokens": 92658305.0, + "reward": 0.0, + "reward_std": 0.8476142883300781, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03401789799743071, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06764689905105666, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08777074514725111, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2052 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1481.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 997.125, + "completions/mean_terminated_length": 997.125, + "completions/min_length": 499.0, + "completions/min_terminated_length": 499.0, + "epoch": 0.5133783445861465, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5784633404434834, + "kl": 0.014801025390625, + "learning_rate": 6.077792598159479e-07, + "loss": -0.027, + "num_tokens": 92696683.0, + "reward": 0.0, + "reward_std": 1.0079814195632935, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09064564687988802, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12134783816441712, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0969917904124231, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2053 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1294.0, + "completions/max_terminated_length": 1294.0, + "completions/mean_length": 991.8125, + "completions/mean_terminated_length": 991.8125, + "completions/min_length": 717.0, + "completions/min_terminated_length": 717.0, + "epoch": 0.5136284071017755, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.364334770295585, + "kl": 0.0091705322265625, + "learning_rate": 6.073896810588284e-07, + "loss": -0.0323, + "num_tokens": 92751120.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7185707092285156, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02879752699057101, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12106667609492272, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10390522747338701, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2054 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1459.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1152.875, + "completions/mean_terminated_length": 1152.875, + "completions/min_length": 951.0, + "completions/min_terminated_length": 951.0, + "epoch": 0.5138784696174044, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3644807360015787, + "kl": 0.0161895751953125, + "learning_rate": 6.070000585726796e-07, + "loss": -0.0203, + "num_tokens": 92808926.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0539655685424805, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00771520292777135, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03721846843450934, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2055 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1318.0, + "completions/mean_length": 1019.5625, + "completions/mean_terminated_length": 987.5333862304688, + "completions/min_length": 747.0, + "completions/min_terminated_length": 747.0, + "epoch": 0.5141285321330332, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.185402238186404, + "kl": 0.0137786865234375, + "learning_rate": 6.066103926543811e-07, + "loss": -0.0479, + "num_tokens": 92860207.0, + "reward": -3.3527612686157227e-08, + "reward_std": 1.0594370365142822, + "rewards/wordcountpos_reward_GEOBench/mean": -3.3527612686157227e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.13258286500484434, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04314545562245761, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10327955589886446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2056 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1137.0, + "completions/max_terminated_length": 1137.0, + "completions/mean_length": 914.5625, + "completions/mean_terminated_length": 914.5625, + "completions/min_length": 636.0, + "completions/min_terminated_length": 636.0, + "epoch": 0.5143785946486622, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3289076611424444, + "kl": 0.00768280029296875, + "learning_rate": 6.062206836008452e-07, + "loss": 0.0207, + "num_tokens": 92897456.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.990839958190918, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05719494586031876, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18096203934065766, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6416666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09999999999999999, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2057 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1380.0, + "completions/max_terminated_length": 1380.0, + "completions/mean_length": 1161.5625, + "completions/mean_terminated_length": 1161.5625, + "completions/min_length": 939.0, + "completions/min_terminated_length": 939.0, + "epoch": 0.5146286571642911, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7329328570128633, + "kl": 0.01458740234375, + "learning_rate": 6.058309317090175e-07, + "loss": -0.0285, + "num_tokens": 92936089.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.591127872467041, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01676016138408305, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08895069691075452, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08421753138505425, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2058 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1344.0, + "completions/mean_length": 1148.0625, + "completions/mean_terminated_length": 1124.60009765625, + "completions/min_length": 988.0, + "completions/min_terminated_length": 988.0, + "epoch": 0.5148787196799199, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.986078353203957, + "kl": 0.0140380859375, + "learning_rate": 6.054411372758757e-07, + "loss": -0.0393, + "num_tokens": 92995946.0, + "reward": 0.0, + "reward_std": 0.691930890083313, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0177200994800695, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07534752096041411, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2059 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1436.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1125.0, + "completions/mean_terminated_length": 1125.0, + "completions/min_length": 901.0, + "completions/min_terminated_length": 901.0, + "epoch": 0.5151287821955489, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4001024258729573, + "kl": 0.018585205078125, + "learning_rate": 6.050513005984301e-07, + "loss": -0.0357, + "num_tokens": 93038298.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0517290830612183, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03301396459288604, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.060197875968623644, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12345339501504503, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2060 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1143.0, + "completions/mean_terminated_length": 1119.2000732421875, + "completions/min_length": 796.0, + "completions/min_terminated_length": 796.0, + "epoch": 0.5153788447111778, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3471161535641087, + "kl": 0.0132598876953125, + "learning_rate": 6.046614219737239e-07, + "loss": 0.0091, + "num_tokens": 93083922.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9486764669418335, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.022580579000345054, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02800170519872547, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2061 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1322.0, + "completions/mean_length": 1141.4375, + "completions/mean_terminated_length": 862.5555419921875, + "completions/min_length": 586.0, + "completions/min_terminated_length": 586.0, + "epoch": 0.5156289072268067, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9797061581979585, + "kl": 0.0140228271484375, + "learning_rate": 6.04271501698831e-07, + "loss": -0.006, + "num_tokens": 93131545.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7129707336425781, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04839382705870491, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11479087508925065, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07503085784948506, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2062 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1304.375, + "completions/mean_terminated_length": 1215.45458984375, + "completions/min_length": 1034.0, + "completions/min_terminated_length": 1034.0, + "epoch": 0.5158789697424356, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.633250819729388, + "kl": 0.0114898681640625, + "learning_rate": 6.03881540070858e-07, + "loss": -0.0552, + "num_tokens": 93184167.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9606443643569946, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03268518005954426, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06454154690099388, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13977495139343474, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2063 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1209.0, + "completions/max_terminated_length": 1209.0, + "completions/mean_length": 1036.5625, + "completions/mean_terminated_length": 1036.5625, + "completions/min_length": 922.0, + "completions/min_terminated_length": 922.0, + "epoch": 0.5161290322580645, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3350058538002503, + "kl": 0.01336669921875, + "learning_rate": 6.034915373869427e-07, + "loss": -0.0382, + "num_tokens": 93222232.0, + "reward": 0.0, + "reward_std": 0.7984811067581177, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07799198713442378, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0482814392581376, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11855612829185828, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2064 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1223.8125, + "completions/mean_terminated_length": 1184.357177734375, + "completions/min_length": 1017.0, + "completions/min_terminated_length": 1017.0, + "epoch": 0.5163790947736934, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.563009267117382, + "kl": 0.01190185546875, + "learning_rate": 6.03101493944254e-07, + "loss": -0.0531, + "num_tokens": 93275917.0, + "reward": 0.0, + "reward_std": 0.9507585763931274, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.2690458237413152, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3185179941450789, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05900408021045227, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2065 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1342.0, + "completions/mean_length": 1151.1875, + "completions/mean_terminated_length": 1127.933349609375, + "completions/min_length": 946.0, + "completions/min_terminated_length": 946.0, + "epoch": 0.5166291572893223, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.221301649261984, + "kl": 0.01995849609375, + "learning_rate": 6.027114100399924e-07, + "loss": -0.0404, + "num_tokens": 93322192.0, + "reward": -2.2351741790771484e-08, + "reward_std": 0.9610854387283325, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.14895085750067505, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15377481145841618, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08421753138505424, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2066 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1243.8125, + "completions/mean_terminated_length": 1207.21435546875, + "completions/min_length": 809.0, + "completions/min_terminated_length": 809.0, + "epoch": 0.5168792198049512, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1992644284746024, + "kl": 0.0157928466796875, + "learning_rate": 6.023212859713885e-07, + "loss": -0.012, + "num_tokens": 93366981.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9358853101730347, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.023912927467423295, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03974695171144502, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2067 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1361.0, + "completions/mean_length": 1052.4375, + "completions/mean_terminated_length": 1022.6000366210938, + "completions/min_length": 758.0, + "completions/min_terminated_length": 758.0, + "epoch": 0.5171292823205802, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.38580244729694, + "kl": 0.0170440673828125, + "learning_rate": 6.019311220357039e-07, + "loss": -0.0202, + "num_tokens": 93422044.0, + "reward": 0.0, + "reward_std": 1.0354093313217163, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04263739483493112, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15288916782960485, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087684, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2068 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1335.625, + "completions/mean_terminated_length": 1237.0, + "completions/min_length": 802.0, + "completions/min_terminated_length": 802.0, + "epoch": 0.5173793448362091, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9316175292568336, + "kl": 0.016448974609375, + "learning_rate": 6.015409185302306e-07, + "loss": -0.0215, + "num_tokens": 93481270.0, + "reward": 0.0, + "reward_std": 0.6443211436271667, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.13081278330021626, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11158107407248617, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901162, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2069 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1380.0, + "completions/mean_length": 1261.625, + "completions/mean_terminated_length": 1023.25, + "completions/min_length": 427.0, + "completions/min_terminated_length": 427.0, + "epoch": 0.5176294073518379, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6664837655662765, + "kl": 0.01373291015625, + "learning_rate": 6.011506757522908e-07, + "loss": 0.0982, + "num_tokens": 93539904.0, + "reward": -2.2351741790771484e-08, + "reward_std": 0.8664066195487976, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0368642261887587, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08419283896020646, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13333333333333333, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2070 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1403.0, + "completions/max_terminated_length": 1403.0, + "completions/mean_length": 1124.9375, + "completions/mean_terminated_length": 1124.9375, + "completions/min_length": 721.0, + "completions/min_terminated_length": 721.0, + "epoch": 0.5178794698674669, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.665320093836123, + "kl": 0.0143280029296875, + "learning_rate": 6.007603939992367e-07, + "loss": -0.0191, + "num_tokens": 93584495.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9795430302619934, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08513645200799008, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12157549106440943, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15341785110291775, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2071 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1232.5, + "completions/mean_terminated_length": 1170.769287109375, + "completions/min_length": 1032.0, + "completions/min_terminated_length": 1032.0, + "epoch": 0.5181295323830958, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3591331712527395, + "kl": 0.0154876708984375, + "learning_rate": 6.003700735684495e-07, + "loss": 0.024, + "num_tokens": 93635991.0, + "reward": 0.0, + "reward_std": 0.7187902927398682, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04412604847787831, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06966673069121226, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620107, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2072 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1179.0, + "completions/mean_length": 1018.625, + "completions/mean_terminated_length": 907.5385131835938, + "completions/min_length": 623.0, + "completions/min_terminated_length": 623.0, + "epoch": 0.5183795948987246, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8560169487951006, + "kl": 0.015167236328125, + "learning_rate": 5.999797147573409e-07, + "loss": -0.049, + "num_tokens": 93664265.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8842338919639587, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0381989673147721, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09782914279674627, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2073 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1494.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1078.9375, + "completions/mean_terminated_length": 1078.9375, + "completions/min_length": 652.0, + "completions/min_terminated_length": 652.0, + "epoch": 0.5186296574143536, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5551904137767973, + "kl": 0.020050048828125, + "learning_rate": 5.99589317863351e-07, + "loss": 0.0025, + "num_tokens": 93711440.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9449819326400757, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01940719991768608, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12200780450683649, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15049301694147857, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2074 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1353.375, + "completions/mean_terminated_length": 1265.4000244140625, + "completions/min_length": 1016.0, + "completions/min_terminated_length": 1016.0, + "epoch": 0.5188797199299825, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.543509646109939, + "kl": 0.018035888671875, + "learning_rate": 5.991988831839493e-07, + "loss": 0.0412, + "num_tokens": 93763774.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7881155610084534, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.004418250578639125, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.013378766730364089, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10470416879457554, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2075 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1484.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1064.8125, + "completions/mean_terminated_length": 1064.8125, + "completions/min_length": 709.0, + "completions/min_terminated_length": 709.0, + "epoch": 0.5191297824456114, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.769112734560471, + "kl": 0.019317626953125, + "learning_rate": 5.988084110166343e-07, + "loss": -0.0454, + "num_tokens": 93800635.0, + "reward": 0.0, + "reward_std": 0.885807991027832, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03555504852683278, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07850707535526831, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238707, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2076 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1185.0, + "completions/mean_length": 998.5, + "completions/mean_terminated_length": 926.857177734375, + "completions/min_length": 762.0, + "completions/min_terminated_length": 762.0, + "epoch": 0.5193798449612403, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4549141663490435, + "kl": 0.0159759521484375, + "learning_rate": 5.984179016589324e-07, + "loss": -0.0417, + "num_tokens": 93841019.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9344780445098877, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.027960026041639822, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1792996784743703, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05821416398857664, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2077 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1325.0, + "completions/mean_length": 1055.25, + "completions/mean_terminated_length": 1025.60009765625, + "completions/min_length": 751.0, + "completions/min_terminated_length": 751.0, + "epoch": 0.5196299074768692, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7979555425673968, + "kl": 0.020172119140625, + "learning_rate": 5.980273554083992e-07, + "loss": 0.0094, + "num_tokens": 93881951.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8794860243797302, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.4776790489250653, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.5270806405178572, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2078 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 904.5, + "completions/mean_terminated_length": 864.800048828125, + "completions/min_length": 517.0, + "completions/min_terminated_length": 517.0, + "epoch": 0.5198799699924981, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.412295805476178, + "kl": 0.01971435546875, + "learning_rate": 5.976367725626177e-07, + "loss": -0.0851, + "num_tokens": 93911327.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0530644655227661, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.003610538959438657, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09004695658647657, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2079 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 893.5625, + "completions/mean_terminated_length": 853.1333618164062, + "completions/min_length": 478.0, + "completions/min_terminated_length": 478.0, + "epoch": 0.5201300325081271, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2775841597331197, + "kl": 0.0159149169921875, + "learning_rate": 5.972461534191991e-07, + "loss": 0.0146, + "num_tokens": 93941656.0, + "reward": 0.0, + "reward_std": 0.5676044225692749, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.016374388513770856, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08465289236330047, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14950535726806533, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2080 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1340.0, + "completions/mean_length": 1225.5, + "completions/mean_terminated_length": 1060.800048828125, + "completions/min_length": 279.0, + "completions/min_terminated_length": 279.0, + "epoch": 0.5203800950237559, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.193079077382661, + "kl": 0.0145263671875, + "learning_rate": 5.968554982757821e-07, + "loss": -0.103, + "num_tokens": 94003288.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0647320747375488, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -2.4689161530140223e-05, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0765237318200881, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14298407059684812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2081 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1431.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 830.8125, + "completions/mean_terminated_length": 830.8125, + "completions/min_length": 577.0, + "completions/min_terminated_length": 577.0, + "epoch": 0.5206301575393848, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.374346817625647, + "kl": 0.0165252685546875, + "learning_rate": 5.964648074300332e-07, + "loss": -0.0629, + "num_tokens": 94040389.0, + "reward": 1.862645149230957e-09, + "reward_std": 1.0678906440734863, + "rewards/wordcountpos_reward_GEOBench/mean": 1.862645149230957e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.062058241300577285, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11958417112140968, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11538983843829063, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2082 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1282.0, + "completions/max_terminated_length": 1282.0, + "completions/mean_length": 1077.25, + "completions/mean_terminated_length": 1077.25, + "completions/min_length": 813.0, + "completions/min_terminated_length": 813.0, + "epoch": 0.5208802200550138, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0925607704997873, + "kl": 0.010406494140625, + "learning_rate": 5.960740811796454e-07, + "loss": -0.0159, + "num_tokens": 94090945.0, + "reward": 0.0, + "reward_std": 0.5227272510528564, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005079949680148653, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.01101964541679368, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1354690069789096, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2083 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1339.3125, + "completions/mean_terminated_length": 1214.3333740234375, + "completions/min_length": 979.0, + "completions/min_terminated_length": 979.0, + "epoch": 0.5211302825706426, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9959828192246714, + "kl": 0.0095062255859375, + "learning_rate": 5.956833198223395e-07, + "loss": -0.0206, + "num_tokens": 94146366.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.009868860244751, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07489141981619657, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1107939723039166, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952503, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2084 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1398.0, + "completions/max_terminated_length": 1398.0, + "completions/mean_length": 1164.8125, + "completions/mean_terminated_length": 1164.8125, + "completions/min_length": 904.0, + "completions/min_terminated_length": 904.0, + "epoch": 0.5213803450862715, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1563826661503094, + "kl": 0.0163421630859375, + "learning_rate": 5.952925236558626e-07, + "loss": -0.0141, + "num_tokens": 94181827.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8861754536628723, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1338604904079167, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0941977924998814, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13526380260918403, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2085 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1437.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1104.125, + "completions/mean_terminated_length": 1104.125, + "completions/min_length": 795.0, + "completions/min_terminated_length": 795.0, + "epoch": 0.5216304076019005, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.009395488078203, + "kl": 0.0166015625, + "learning_rate": 5.949016929779882e-07, + "loss": -0.0101, + "num_tokens": 94227485.0, + "reward": 0.0, + "reward_std": 0.8716477155685425, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06587638498762566, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08597238352423195, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12758439472669758, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2086 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1114.0, + "completions/mean_length": 1181.875, + "completions/mean_terminated_length": 863.75, + "completions/min_length": 752.0, + "completions/min_terminated_length": 752.0, + "epoch": 0.5218804701175294, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9935814017639295, + "kl": 0.01373291015625, + "learning_rate": 5.945108280865165e-07, + "loss": -0.0244, + "num_tokens": 94273755.0, + "reward": 0.0, + "reward_std": 0.8782919645309448, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0038947037568267742, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.012616119913759912, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746353, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2087 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1383.0, + "completions/max_terminated_length": 1383.0, + "completions/mean_length": 1105.8125, + "completions/mean_terminated_length": 1105.8125, + "completions/min_length": 843.0, + "completions/min_terminated_length": 843.0, + "epoch": 0.5221305326331583, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.862600066845566, + "kl": 0.0118865966796875, + "learning_rate": 5.941199292792735e-07, + "loss": -0.0181, + "num_tokens": 94328464.0, + "reward": 0.0, + "reward_std": 0.8924115896224976, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08604406631590884, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18333518712172, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042258, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2088 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1499.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1162.5625, + "completions/mean_terminated_length": 1162.5625, + "completions/min_length": 824.0, + "completions/min_terminated_length": 824.0, + "epoch": 0.5223805951487872, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.867700489622576, + "kl": 0.012115478515625, + "learning_rate": 5.93728996854111e-07, + "loss": -0.0367, + "num_tokens": 94369369.0, + "reward": 0.0, + "reward_std": 0.892612099647522, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.041058099744841994, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09322728129763301, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1246476515504285, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2089 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1116.4375, + "completions/mean_terminated_length": 1090.86669921875, + "completions/min_length": 738.0, + "completions/min_terminated_length": 738.0, + "epoch": 0.5226306576644161, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.409860098032638, + "kl": 0.0131378173828125, + "learning_rate": 5.933380311089064e-07, + "loss": 0.011, + "num_tokens": 94407880.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5199778079986572, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10102876623226012, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12239651556257045, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10602235962635781, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2090 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1317.0, + "completions/max_terminated_length": 1317.0, + "completions/mean_length": 1098.875, + "completions/mean_terminated_length": 1098.875, + "completions/min_length": 935.0, + "completions/min_terminated_length": 935.0, + "epoch": 0.522880720180045, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7513175288865335, + "kl": 0.01531982421875, + "learning_rate": 5.929470323415631e-07, + "loss": 0.0035, + "num_tokens": 94449398.0, + "reward": -2.2351741790771484e-08, + "reward_std": 0.9971372485160828, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0707092973588074, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07629480492128732, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316813, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2091 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1398.0, + "completions/mean_length": 1360.9375, + "completions/mean_terminated_length": 1221.875, + "completions/min_length": 950.0, + "completions/min_terminated_length": 950.0, + "epoch": 0.5231307826956739, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9431249848849155, + "kl": 0.0131072998046875, + "learning_rate": 5.925560008500087e-07, + "loss": 0.0014, + "num_tokens": 94502581.0, + "reward": 0.0, + "reward_std": 0.5498429536819458, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.047670960302519924, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08552437676078928, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11538983843829066, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2092 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1409.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1032.875, + "completions/mean_terminated_length": 1032.875, + "completions/min_length": 846.0, + "completions/min_terminated_length": 846.0, + "epoch": 0.5233808452113028, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9043209892640323, + "kl": 0.01434326171875, + "learning_rate": 5.921649369321964e-07, + "loss": -0.0538, + "num_tokens": 94531827.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.032618522644043, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.054087304715290344, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12064382119274188, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1112221667221529, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2093 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1182.0, + "completions/mean_terminated_length": 1160.800048828125, + "completions/min_length": 828.0, + "completions/min_terminated_length": 828.0, + "epoch": 0.5236309077269318, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1718130487801375, + "kl": 0.013824462890625, + "learning_rate": 5.917738408861037e-07, + "loss": -0.0012, + "num_tokens": 94577339.0, + "reward": 0.0, + "reward_std": 0.7127140760421753, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10579606005342122, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1441522096070042, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2094 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1177.0, + "completions/mean_length": 1229.1875, + "completions/mean_terminated_length": 1018.5555419921875, + "completions/min_length": 626.0, + "completions/min_terminated_length": 626.0, + "epoch": 0.5238809702425606, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0794469577406804, + "kl": 0.0170440673828125, + "learning_rate": 5.91382713009733e-07, + "loss": -0.0181, + "num_tokens": 94640278.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.7212100625038147, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00515394418940481, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04819007316396134, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06191391873668902, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2095 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1308.375, + "completions/mean_terminated_length": 1159.3333740234375, + "completions/min_length": 964.0, + "completions/min_terminated_length": 964.0, + "epoch": 0.5241310327581895, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9868428662865294, + "kl": 0.007049560546875, + "learning_rate": 5.909915536011106e-07, + "loss": 0.0199, + "num_tokens": 94686476.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6554123163223267, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0036148175643158677, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03870852634082781, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07084150279686706, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 2096 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1245.625, + "completions/mean_terminated_length": 1228.666748046875, + "completions/min_length": 917.0, + "completions/min_terminated_length": 917.0, + "epoch": 0.5243810952738185, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.92406579237291, + "kl": 0.0149078369140625, + "learning_rate": 5.906003629582871e-07, + "loss": -0.0106, + "num_tokens": 94726150.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.9010567665100098, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03514846533782064, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04810942674114972, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13443985299781488, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2097 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1259.0625, + "completions/mean_terminated_length": 1224.6429443359375, + "completions/min_length": 1053.0, + "completions/min_terminated_length": 1053.0, + "epoch": 0.5246311577894474, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4306725336064563, + "kl": 0.01898193359375, + "learning_rate": 5.902091413793366e-07, + "loss": -0.0133, + "num_tokens": 94782615.0, + "reward": 2.421438694000244e-08, + "reward_std": 1.0402885675430298, + "rewards/wordcountpos_reward_GEOBench/mean": 2.421438694000244e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03798819212107854, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09225491426990319, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2098 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1029.5625, + "completions/mean_terminated_length": 962.357177734375, + "completions/min_length": 559.0, + "completions/min_terminated_length": 559.0, + "epoch": 0.5248812203050762, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6190898729292784, + "kl": 0.010101318359375, + "learning_rate": 5.898178891623572e-07, + "loss": -0.0987, + "num_tokens": 94817104.0, + "reward": 2.60770320892334e-08, + "reward_std": 1.0322155952453613, + "rewards/wordcountpos_reward_GEOBench/mean": 2.60770320892334e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03758123628337848, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03339679429386938, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05692750425533111, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2099 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1088.0625, + "completions/mean_terminated_length": 950.75, + "completions/min_length": 647.0, + "completions/min_terminated_length": 647.0, + "epoch": 0.5251312828207052, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4950164924150044, + "kl": 0.0163116455078125, + "learning_rate": 5.894266066054698e-07, + "loss": -0.0074, + "num_tokens": 94856025.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.31748268008232117, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.23947386409958044, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.26412125341764586, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14807405554629052, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2100 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1255.1875, + "completions/mean_terminated_length": 1198.6923828125, + "completions/min_length": 989.0, + "completions/min_terminated_length": 989.0, + "epoch": 0.5253813453363341, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.315700498190512, + "kl": 0.0142059326171875, + "learning_rate": 5.890352940068188e-07, + "loss": -0.0801, + "num_tokens": 94893972.0, + "reward": 0.0, + "reward_std": 0.5294963717460632, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04466736836085117, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20866236028845844, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042253, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2101 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1410.0, + "completions/mean_length": 1001.25, + "completions/mean_terminated_length": 930.0000610351562, + "completions/min_length": 458.0, + "completions/min_terminated_length": 458.0, + "epoch": 0.5256314078519629, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.923038913796182, + "kl": 0.019622802734375, + "learning_rate": 5.886439516645715e-07, + "loss": 0.0489, + "num_tokens": 94935784.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9429227113723755, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0490526632105144, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07462090204196052, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12224747213928167, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2102 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1403.0, + "completions/mean_length": 1260.125, + "completions/mean_terminated_length": 1151.0909423828125, + "completions/min_length": 971.0, + "completions/min_terminated_length": 971.0, + "epoch": 0.5258814703675919, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0686750859395993, + "kl": 0.0157470703125, + "learning_rate": 5.882525798769177e-07, + "loss": 0.0455, + "num_tokens": 94983354.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9055259227752686, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.2547557693842341, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2766164459803254, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09496588081262934, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2103 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1212.75, + "completions/mean_terminated_length": 1193.60009765625, + "completions/min_length": 831.0, + "completions/min_terminated_length": 831.0, + "epoch": 0.5261315328832208, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9890851457471377, + "kl": 0.018402099609375, + "learning_rate": 5.878611789420694e-07, + "loss": -0.0433, + "num_tokens": 95024238.0, + "reward": 0.0, + "reward_std": 0.9065369963645935, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.024750508237654865, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05419993446198601, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06763190130459204, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 2104 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1421.0, + "completions/max_terminated_length": 1421.0, + "completions/mean_length": 1175.875, + "completions/mean_terminated_length": 1175.875, + "completions/min_length": 970.0, + "completions/min_terminated_length": 970.0, + "epoch": 0.5263815953988498, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6655239548346734, + "kl": 0.00920867919921875, + "learning_rate": 5.874697491582616e-07, + "loss": -0.0446, + "num_tokens": 95081092.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5941611528396606, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.054143808021452786, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07902763683148523, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238707, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2105 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1039.0, + "completions/max_terminated_length": 1039.0, + "completions/mean_length": 773.0625, + "completions/mean_terminated_length": 773.0625, + "completions/min_length": 561.0, + "completions/min_terminated_length": 561.0, + "epoch": 0.5266316579144786, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.989716377537315, + "kl": 0.01177978515625, + "learning_rate": 5.870782908237506e-07, + "loss": -0.0085, + "num_tokens": 95117797.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0120573043823242, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05577900201000108, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12274118189187587, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13045504405165223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2106 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1138.8125, + "completions/mean_terminated_length": 1114.7333984375, + "completions/min_length": 766.0, + "completions/min_terminated_length": 766.0, + "epoch": 0.5268817204301075, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3702635814736124, + "kl": 0.019439697265625, + "learning_rate": 5.866868042368143e-07, + "loss": -0.0039, + "num_tokens": 95165466.0, + "reward": 0.0, + "reward_std": 1.053542137145996, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11484697533691474, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14309523614118544, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567837, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2107 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1116.625, + "completions/mean_terminated_length": 1091.0667724609375, + "completions/min_length": 528.0, + "completions/min_terminated_length": 528.0, + "epoch": 0.5271317829457365, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3304177886161614, + "kl": 0.017913818359375, + "learning_rate": 5.86295289695753e-07, + "loss": -0.0379, + "num_tokens": 95199212.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0157506465911865, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.017335147979470764, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07311891818507527, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14446581038560774, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2108 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1292.4375, + "completions/mean_terminated_length": 1131.0, + "completions/min_length": 1015.0, + "completions/min_terminated_length": 1015.0, + "epoch": 0.5273818454613654, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.952287728494474, + "kl": 0.0123443603515625, + "learning_rate": 5.859037474988874e-07, + "loss": -0.0233, + "num_tokens": 95237603.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.045555591583252, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.321041669951908, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17234626406691755, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13871099718746435, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2109 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1404.0, + "completions/mean_length": 1292.8125, + "completions/mean_terminated_length": 1223.75, + "completions/min_length": 927.0, + "completions/min_terminated_length": 927.0, + "epoch": 0.5276319079769942, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.126049166717576, + "kl": 0.0076904296875, + "learning_rate": 5.855121779445598e-07, + "loss": -0.0388, + "num_tokens": 95284392.0, + "reward": 0.0, + "reward_std": 0.9976513385772705, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.030308080595724637, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1048470970393048, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07290277645477446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2110 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1238.375, + "completions/mean_terminated_length": 1178.0, + "completions/min_length": 839.0, + "completions/min_terminated_length": 839.0, + "epoch": 0.5278819704926232, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0938917742936, + "kl": 0.0159912109375, + "learning_rate": 5.851205813311331e-07, + "loss": -0.0203, + "num_tokens": 95331758.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.0539652109146118, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.024747996289613296, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05339751975288366, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11279282877125756, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2111 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 1193.4375, + "completions/mean_terminated_length": 1173.0001220703125, + "completions/min_length": 825.0, + "completions/min_terminated_length": 825.0, + "epoch": 0.5281320330082521, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4663208512366994, + "kl": 0.0164337158203125, + "learning_rate": 5.84728957956991e-07, + "loss": -0.0093, + "num_tokens": 95383621.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9963194727897644, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0004930828043848701, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10848335640759374, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503964, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2112 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1192.875, + "completions/mean_terminated_length": 1172.4000244140625, + "completions/min_length": 719.0, + "completions/min_terminated_length": 719.0, + "epoch": 0.5283820955238809, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.342267542446664, + "kl": 0.01953125, + "learning_rate": 5.843373081205372e-07, + "loss": -0.0064, + "num_tokens": 95434051.0, + "reward": 0.0, + "reward_std": 0.9884222149848938, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00884633052653356, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18422769022189595, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08933913745655642, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2113 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1198.0, + "completions/max_terminated_length": 1198.0, + "completions/mean_length": 933.75, + "completions/mean_terminated_length": 933.75, + "completions/min_length": 723.0, + "completions/min_terminated_length": 723.0, + "epoch": 0.5286321580395099, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.290938139930958, + "kl": 0.0147247314453125, + "learning_rate": 5.839456321201962e-07, + "loss": -0.0029, + "num_tokens": 95484679.0, + "reward": 0.0, + "reward_std": 0.6961387395858765, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0857620265120534, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08473982390346961, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298364, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2114 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1352.625, + "completions/mean_terminated_length": 1303.5, + "completions/min_length": 969.0, + "completions/min_terminated_length": 969.0, + "epoch": 0.5288822205551388, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.007458030635222, + "kl": 0.0139312744140625, + "learning_rate": 5.835539302544121e-07, + "loss": -0.0136, + "num_tokens": 95535481.0, + "reward": 0.0, + "reward_std": 0.662661075592041, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.021088116005413906, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10924743730124988, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11279282877125756, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2115 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1217.4375, + "completions/mean_terminated_length": 1177.071533203125, + "completions/min_length": 916.0, + "completions/min_terminated_length": 916.0, + "epoch": 0.5291322830707677, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.955236995314679, + "kl": 0.0140533447265625, + "learning_rate": 5.831622028216484e-07, + "loss": -0.011, + "num_tokens": 95590520.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0657002925872803, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.011092660712777855, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06040808927840702, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14907119849998599, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2116 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1112.0, + "completions/mean_terminated_length": 1086.1334228515625, + "completions/min_length": 761.0, + "completions/min_terminated_length": 761.0, + "epoch": 0.5293823455863966, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1627187555404004, + "kl": 0.015594482421875, + "learning_rate": 5.827704501203887e-07, + "loss": -0.0336, + "num_tokens": 95636664.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.987285315990448, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1384949598792334, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0779595298161153, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10318986456114841, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2117 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1274.9375, + "completions/mean_terminated_length": 1223.0, + "completions/min_length": 940.0, + "completions/min_terminated_length": 940.0, + "epoch": 0.5296324081020255, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6941616326938407, + "kl": 0.0107421875, + "learning_rate": 5.823786724491353e-07, + "loss": 0.0133, + "num_tokens": 95688959.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9890602827072144, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.015746951996377028, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08934627226743584, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921946, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2118 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1334.0, + "completions/mean_length": 1169.1875, + "completions/mean_terminated_length": 1147.1334228515625, + "completions/min_length": 1033.0, + "completions/min_terminated_length": 1033.0, + "epoch": 0.5298824706176544, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3189199580526885, + "kl": 0.009429931640625, + "learning_rate": 5.819868701064098e-07, + "loss": 0.0179, + "num_tokens": 95735930.0, + "reward": 0.0, + "reward_std": 0.9267513155937195, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06163411838538413, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07511311844097791, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078612, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2119 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1187.0, + "completions/mean_length": 955.25, + "completions/mean_terminated_length": 918.9334106445312, + "completions/min_length": 686.0, + "completions/min_terminated_length": 686.0, + "epoch": 0.5301325331332833, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4350157266880044, + "kl": 0.0174102783203125, + "learning_rate": 5.815950433907527e-07, + "loss": 0.0248, + "num_tokens": 95760366.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8314720392227173, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.019983346109611607, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04477408746828088, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09496588081262936, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2120 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1322.0625, + "completions/mean_terminated_length": 1262.75, + "completions/min_length": 991.0, + "completions/min_terminated_length": 991.0, + "epoch": 0.5303825956489122, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.038059139808116, + "kl": 0.01971435546875, + "learning_rate": 5.812031926007228e-07, + "loss": -0.0339, + "num_tokens": 95817991.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9316328167915344, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.016573727632000986, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1048744889257379, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2121 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1227.0, + "completions/max_terminated_length": 1227.0, + "completions/mean_length": 961.3125, + "completions/mean_terminated_length": 961.3125, + "completions/min_length": 677.0, + "completions/min_terminated_length": 677.0, + "epoch": 0.5306326581645411, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.592039320598154, + "kl": 0.011627197265625, + "learning_rate": 5.808113180348974e-07, + "loss": 0.0365, + "num_tokens": 95856532.0, + "reward": 0.0, + "reward_std": 0.8379043340682983, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.059858395380594696, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0711491773374034, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563384, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2122 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1266.8125, + "completions/mean_terminated_length": 1213.0, + "completions/min_length": 906.0, + "completions/min_terminated_length": 906.0, + "epoch": 0.5308827206801701, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.518918680277145, + "kl": 0.022674560546875, + "learning_rate": 5.804194199918722e-07, + "loss": 0.025, + "num_tokens": 95914825.0, + "reward": 0.0, + "reward_std": 0.7380218505859375, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.032738260919610226, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.046074882772473735, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05163977794943227, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2123 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1107.4375, + "completions/mean_terminated_length": 1051.357177734375, + "completions/min_length": 699.0, + "completions/min_terminated_length": 699.0, + "epoch": 0.5311327831957989, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4692705446158714, + "kl": 0.0167083740234375, + "learning_rate": 5.800274987702599e-07, + "loss": 0.0634, + "num_tokens": 95964216.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0596245527267456, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05435537089269006, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1396581473418637, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2124 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1319.5625, + "completions/mean_terminated_length": 1211.300048828125, + "completions/min_length": 967.0, + "completions/min_terminated_length": 967.0, + "epoch": 0.5313828457114279, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0847967743775433, + "kl": 0.015777587890625, + "learning_rate": 5.796355546686916e-07, + "loss": 0.0378, + "num_tokens": 96016401.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8805702924728394, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06545756323275992, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0741521363412751, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2125 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1225.1875, + "completions/mean_terminated_length": 1060.300048828125, + "completions/min_length": 810.0, + "completions/min_terminated_length": 810.0, + "epoch": 0.5316329082270568, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6319395880178886, + "kl": 0.017913818359375, + "learning_rate": 5.792435879858162e-07, + "loss": 0.0322, + "num_tokens": 96055212.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.004536509513855, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.029716077076366285, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06994206340367856, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10749676997731401, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2126 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1395.0, + "completions/mean_length": 1281.3125, + "completions/mean_terminated_length": 1111.2222900390625, + "completions/min_length": 898.0, + "completions/min_terminated_length": 898.0, + "epoch": 0.5318829707426856, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7696337743980077, + "kl": 0.0155181884765625, + "learning_rate": 5.788515990202986e-07, + "loss": -0.0167, + "num_tokens": 96107905.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.35603436827659607, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.049972418710818416, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05908804904317615, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15770342536029575, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2127 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1455.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1020.5, + "completions/mean_terminated_length": 1020.5, + "completions/min_length": 587.0, + "completions/min_terminated_length": 587.0, + "epoch": 0.5321330332583146, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0313339519407774, + "kl": 0.0124359130859375, + "learning_rate": 5.784595880708217e-07, + "loss": -0.0545, + "num_tokens": 96140857.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9223519563674927, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04839123227872931, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07295017654792045, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11417984514369006, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2128 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1390.0, + "completions/max_terminated_length": 1390.0, + "completions/mean_length": 1074.5, + "completions/mean_terminated_length": 1074.5, + "completions/min_length": 713.0, + "completions/min_terminated_length": 713.0, + "epoch": 0.5323830957739435, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.426542450562468, + "kl": 0.018096923828125, + "learning_rate": 5.78067555436085e-07, + "loss": -0.0194, + "num_tokens": 96183129.0, + "reward": 2.0489096641540527e-08, + "reward_std": 1.0086948871612549, + "rewards/wordcountpos_reward_GEOBench/mean": 2.0489096641540527e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.033493403329727736, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13700541889504902, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11547005383792516, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2129 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1326.125, + "completions/mean_terminated_length": 1286.0, + "completions/min_length": 1115.0, + "completions/min_terminated_length": 1115.0, + "epoch": 0.5326331582895724, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.073613676964624, + "kl": 0.018310546875, + "learning_rate": 5.776755014148038e-07, + "loss": -0.0124, + "num_tokens": 96235803.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.963742733001709, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00938979499889004, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05312970141392974, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2130 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1035.4375, + "completions/mean_terminated_length": 1004.4667358398438, + "completions/min_length": 676.0, + "completions/min_terminated_length": 676.0, + "epoch": 0.5328832208052013, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.156636339518644, + "kl": 0.0144805908203125, + "learning_rate": 5.772834263057107e-07, + "loss": -0.0988, + "num_tokens": 96278906.0, + "reward": 0.0, + "reward_std": 0.9048461318016052, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08433183643258553, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09044828882033666, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353545, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2131 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1427.0, + "completions/max_terminated_length": 1427.0, + "completions/mean_length": 1131.125, + "completions/mean_terminated_length": 1131.125, + "completions/min_length": 729.0, + "completions/min_terminated_length": 729.0, + "epoch": 0.5331332833208302, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.491987287337192, + "kl": 0.0162506103515625, + "learning_rate": 5.768913304075536e-07, + "loss": -0.0147, + "num_tokens": 96334020.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.4309302568435669, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.028931663928374367, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2530565700276155, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10878112581387149, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2132 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1458.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1238.1875, + "completions/mean_terminated_length": 1238.1875, + "completions/min_length": 968.0, + "completions/min_terminated_length": 968.0, + "epoch": 0.5333833458364591, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1292768717297763, + "kl": 0.01385498046875, + "learning_rate": 5.764992140190968e-07, + "loss": 0.0189, + "num_tokens": 96376815.0, + "reward": -3.725290298461914e-08, + "reward_std": 0.9473576545715332, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011741936247729413, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06531515228086905, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08421753138505425, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2133 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1248.0, + "completions/mean_length": 1142.6875, + "completions/mean_terminated_length": 1060.2308349609375, + "completions/min_length": 920.0, + "completions/min_terminated_length": 920.0, + "epoch": 0.5336334083520881, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1703394591163505, + "kl": 0.00959014892578125, + "learning_rate": 5.761070774391198e-07, + "loss": 0.1002, + "num_tokens": 96432954.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9931933879852295, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09411347814528143, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05133212494317107, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133131, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2134 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1471.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1203.5625, + "completions/mean_terminated_length": 1203.5625, + "completions/min_length": 1020.0, + "completions/min_terminated_length": 1020.0, + "epoch": 0.5338834708677169, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1373070625948496, + "kl": 0.01910400390625, + "learning_rate": 5.757149209664177e-07, + "loss": -0.0028, + "num_tokens": 96483011.0, + "reward": -1.862645149230957e-09, + "reward_std": 0.9772847890853882, + "rewards/wordcountpos_reward_GEOBench/mean": -1.862645149230957e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.057340451233859994, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07171892451067592, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901863, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2135 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1078.375, + "completions/mean_terminated_length": 1050.2667236328125, + "completions/min_length": 615.0, + "completions/min_terminated_length": 615.0, + "epoch": 0.5341335333833458, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1899348424185923, + "kl": 0.02020263671875, + "learning_rate": 5.753227448998006e-07, + "loss": -0.0108, + "num_tokens": 96537433.0, + "reward": 0.0, + "reward_std": 0.665459156036377, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.003820830604734744, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.059700423432697367, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066469, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2136 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1188.375, + "completions/mean_terminated_length": 1167.60009765625, + "completions/min_length": 760.0, + "completions/min_terminated_length": 760.0, + "epoch": 0.5343835958989748, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8728462751930652, + "kl": 0.01361083984375, + "learning_rate": 5.749305495380936e-07, + "loss": -0.0627, + "num_tokens": 96575343.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9731511473655701, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1021588526323294, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12008123759350887, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14497764834110988, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2137 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1149.0, + "completions/max_terminated_length": 1149.0, + "completions/mean_length": 957.3125, + "completions/mean_terminated_length": 957.3125, + "completions/min_length": 769.0, + "completions/min_terminated_length": 769.0, + "epoch": 0.5346336584146036, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.046806129319279, + "kl": 0.0103302001953125, + "learning_rate": 5.745383351801368e-07, + "loss": -0.0192, + "num_tokens": 96616580.0, + "reward": -2.2351741790771484e-08, + "reward_std": 0.924839198589325, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.008753336252423279, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.058434693321256406, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05708992257184505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2138 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1250.6875, + "completions/mean_terminated_length": 1215.071533203125, + "completions/min_length": 931.0, + "completions/min_terminated_length": 931.0, + "epoch": 0.5348837209302325, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.944145491103988, + "kl": 0.0158538818359375, + "learning_rate": 5.741461021247842e-07, + "loss": -0.0156, + "num_tokens": 96664719.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5610970854759216, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.015398321200014393, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1331028044803799, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11894598836509011, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2139 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1295.25, + "completions/mean_terminated_length": 1266.0, + "completions/min_length": 1176.0, + "completions/min_terminated_length": 1176.0, + "epoch": 0.5351337834458615, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.657167143287944, + "kl": 0.01218414306640625, + "learning_rate": 5.737538506709045e-07, + "loss": -0.0154, + "num_tokens": 96717603.0, + "reward": 0.0, + "reward_std": 0.4168713092803955, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.046298457273038454, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10949769174541706, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620105, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2140 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1270.0, + "completions/max_terminated_length": 1270.0, + "completions/mean_length": 1069.5, + "completions/mean_terminated_length": 1069.5, + "completions/min_length": 843.0, + "completions/min_terminated_length": 843.0, + "epoch": 0.5353838459614904, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8505354175504904, + "kl": 0.0123291015625, + "learning_rate": 5.733615811173803e-07, + "loss": 0.0259, + "num_tokens": 96763667.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9182066917419434, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02087911774594804, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0580843221835424, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08933913745655642, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2141 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1188.875, + "completions/mean_terminated_length": 946.888916015625, + "completions/min_length": 740.0, + "completions/min_terminated_length": 740.0, + "epoch": 0.5356339084771192, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.096274046318493, + "kl": 0.01397705078125, + "learning_rate": 5.729692937631078e-07, + "loss": -0.0134, + "num_tokens": 96808657.0, + "reward": 0.0, + "reward_std": 0.6443644762039185, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.017602892770477534, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02564974079527656, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1264911064067352, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2142 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1487.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1162.8125, + "completions/mean_terminated_length": 1162.8125, + "completions/min_length": 718.0, + "completions/min_terminated_length": 718.0, + "epoch": 0.5358839709927482, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7713169810237344, + "kl": 0.0148773193359375, + "learning_rate": 5.72576988906997e-07, + "loss": 0.009, + "num_tokens": 96843726.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.39237844944000244, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07498306801612342, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07533939630386498, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16865480854231357, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2143 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1284.625, + "completions/mean_terminated_length": 1234.923095703125, + "completions/min_length": 1013.0, + "completions/min_terminated_length": 1013.0, + "epoch": 0.5361340335083771, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.416431471914969, + "kl": 0.009918212890625, + "learning_rate": 5.721846668479712e-07, + "loss": -0.009, + "num_tokens": 96889448.0, + "reward": 0.0, + "reward_std": 1.0544312000274658, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04509657907513212, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.075009397973395, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2144 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1171.5625, + "completions/mean_terminated_length": 1124.6429443359375, + "completions/min_length": 889.0, + "completions/min_terminated_length": 889.0, + "epoch": 0.536384096024006, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.931633817302061, + "kl": 0.0140533447265625, + "learning_rate": 5.717923278849668e-07, + "loss": 0.0192, + "num_tokens": 96941393.0, + "reward": 0.0, + "reward_std": 0.950148344039917, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.15199080423540196, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15845224266275332, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042258, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 2145 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1217.75, + "completions/mean_terminated_length": 1177.4285888671875, + "completions/min_length": 849.0, + "completions/min_terminated_length": 849.0, + "epoch": 0.5366341585396349, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3958563034878257, + "kl": 0.013092041015625, + "learning_rate": 5.713999723169329e-07, + "loss": 0.002, + "num_tokens": 96974605.0, + "reward": 0.0, + "reward_std": 0.635490894317627, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.23950739798562162, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.23369776102810835, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078611, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2146 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1389.0, + "completions/max_terminated_length": 1389.0, + "completions/mean_length": 1114.375, + "completions/mean_terminated_length": 1114.375, + "completions/min_length": 786.0, + "completions/min_terminated_length": 786.0, + "epoch": 0.5368842210552638, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0880596920512318, + "kl": 0.0158843994140625, + "learning_rate": 5.710076004428316e-07, + "loss": -0.0522, + "num_tokens": 97026227.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6089726686477661, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11593161261927663, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08714987223322755, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042252, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2147 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.75, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1462.5, + "completions/mean_terminated_length": 1350.0, + "completions/min_length": 1213.0, + "completions/min_terminated_length": 1213.0, + "epoch": 0.5371342835708928, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2782217294971363, + "kl": 0.00984954833984375, + "learning_rate": 5.70615212561637e-07, + "loss": -0.0087, + "num_tokens": 97085227.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0206867456436157, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.021754944359913424, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08061440145741218, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11800816042090449, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2148 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1437.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1096.9375, + "completions/mean_terminated_length": 1096.9375, + "completions/min_length": 803.0, + "completions/min_terminated_length": 803.0, + "epoch": 0.5373843460865216, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7046992986891594, + "kl": 0.02056884765625, + "learning_rate": 5.702228089723358e-07, + "loss": 0.0017, + "num_tokens": 97136562.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9506425261497498, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.012975736174272862, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.049023746670118336, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08777074514725113, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2149 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1298.0, + "completions/max_terminated_length": 1298.0, + "completions/mean_length": 962.625, + "completions/mean_terminated_length": 962.625, + "completions/min_length": 868.0, + "completions/min_terminated_length": 868.0, + "epoch": 0.5376344086021505, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.391091476866148, + "kl": 0.01617431640625, + "learning_rate": 5.698303899739264e-07, + "loss": -0.0101, + "num_tokens": 97171108.0, + "reward": 4.470348358154297e-08, + "reward_std": 0.9838425517082214, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08825477854637424, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09820840997870932, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258102, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2150 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1249.5625, + "completions/mean_terminated_length": 1099.300048828125, + "completions/min_length": 913.0, + "completions/min_terminated_length": 913.0, + "epoch": 0.5378844711177795, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.572550380742959, + "kl": 0.0113525390625, + "learning_rate": 5.694379558654191e-07, + "loss": -0.0002, + "num_tokens": 97225709.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8912241458892822, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.006584418370277376, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.058964720394547684, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08432740427115681, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2151 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1420.0, + "completions/mean_length": 1355.5625, + "completions/mean_terminated_length": 1307.416748046875, + "completions/min_length": 1178.0, + "completions/min_terminated_length": 1178.0, + "epoch": 0.5381345336334084, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9441267127699637, + "kl": 0.01544189453125, + "learning_rate": 5.690455069458354e-07, + "loss": -0.0012, + "num_tokens": 97279838.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.017106056213379, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.021616997310864392, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13582365813089936, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06666666666666671, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2152 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1268.375, + "completions/mean_terminated_length": 1129.4000244140625, + "completions/min_length": 1001.0, + "completions/min_terminated_length": 1001.0, + "epoch": 0.5383845961490372, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.918799712254571, + "kl": 0.00665283203125, + "learning_rate": 5.686530435142088e-07, + "loss": -0.0114, + "num_tokens": 97323156.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6883466243743896, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06436795598309576, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16527239069077396, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921948, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2153 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1320.6875, + "completions/mean_terminated_length": 1279.3077392578125, + "completions/min_length": 654.0, + "completions/min_terminated_length": 654.0, + "epoch": 0.5386346586646662, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.818602973940923, + "kl": 0.0180511474609375, + "learning_rate": 5.682605658695831e-07, + "loss": -0.006, + "num_tokens": 97382703.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.9874147176742554, + "rewards/wordcountpos_reward_GEOBench/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.023440432024345148, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20013684033391563, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04868644955601477, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2154 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1342.0, + "completions/mean_length": 1137.5, + "completions/mean_terminated_length": 1113.3333740234375, + "completions/min_length": 862.0, + "completions/min_terminated_length": 862.0, + "epoch": 0.5388847211802951, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4725883841405736, + "kl": 0.017578125, + "learning_rate": 5.678680743110132e-07, + "loss": -0.0225, + "num_tokens": 97428007.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9608802795410156, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.012222864873974365, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07934830540014133, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11155467020454343, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2155 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1202.0, + "completions/max_terminated_length": 1202.0, + "completions/mean_length": 1068.0625, + "completions/mean_terminated_length": 1068.0625, + "completions/min_length": 938.0, + "completions/min_terminated_length": 938.0, + "epoch": 0.5391347836959239, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9956068844168824, + "kl": 0.00870513916015625, + "learning_rate": 5.674755691375646e-07, + "loss": -0.0194, + "num_tokens": 97467080.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9522291421890259, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.23626398814662455, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10592652613121799, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10246950765959599, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2156 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1484.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1067.4375, + "completions/mean_terminated_length": 1067.4375, + "completions/min_length": 741.0, + "completions/min_terminated_length": 741.0, + "epoch": 0.5393848462115529, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.998240467140788, + "kl": 0.0133209228515625, + "learning_rate": 5.670830506483132e-07, + "loss": -0.078, + "num_tokens": 97505175.0, + "reward": 0.0, + "reward_std": 0.7353177070617676, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.2623618275036807, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2234553950298837, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715322, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2157 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1328.375, + "completions/mean_terminated_length": 1271.166748046875, + "completions/min_length": 1092.0, + "completions/min_terminated_length": 1092.0, + "epoch": 0.5396349087271818, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4384654131782377, + "kl": 0.01654052734375, + "learning_rate": 5.666905191423452e-07, + "loss": -0.0195, + "num_tokens": 97545541.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9737201929092407, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0729949527122182, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14741402822672833, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12412657816683505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2158 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1356.5, + "completions/mean_terminated_length": 1323.3846435546875, + "completions/min_length": 1092.0, + "completions/min_terminated_length": 1092.0, + "epoch": 0.5398849712428107, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3309971397485776, + "kl": 0.0106964111328125, + "learning_rate": 5.662979749187565e-07, + "loss": 0.0018, + "num_tokens": 97592045.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.6566680073738098, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05604309323627635, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10449316621336922, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466159, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2159 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1346.0, + "completions/mean_length": 1202.3125, + "completions/mean_terminated_length": 1182.4666748046875, + "completions/min_length": 919.0, + "completions/min_terminated_length": 919.0, + "epoch": 0.5401350337584396, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3185036737130966, + "kl": 0.016845703125, + "learning_rate": 5.659054182766525e-07, + "loss": -0.0024, + "num_tokens": 97635946.0, + "reward": 1.862645149230957e-08, + "reward_std": 1.0639147758483887, + "rewards/wordcountpos_reward_GEOBench/mean": 1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.014636424853809268, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.039270181855829735, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.131021626713557, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2160 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1479.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1168.3125, + "completions/mean_terminated_length": 1168.3125, + "completions/min_length": 904.0, + "completions/min_terminated_length": 904.0, + "epoch": 0.5403850962740685, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.023036742943549, + "kl": 0.015594482421875, + "learning_rate": 5.655128495151488e-07, + "loss": -0.0301, + "num_tokens": 97688495.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0047645568847656, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04678170853144583, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07075273293334725, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901859, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2161 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 1202.875, + "completions/mean_terminated_length": 1183.0667724609375, + "completions/min_length": 1037.0, + "completions/min_terminated_length": 1037.0, + "epoch": 0.5406351587896975, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2875463617188396, + "kl": 0.01593017578125, + "learning_rate": 5.651202689333691e-07, + "loss": -0.023, + "num_tokens": 97738021.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.058957576751709, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03015498924320164, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0485747202655981, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2162 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1244.0, + "completions/mean_terminated_length": 1127.6363525390625, + "completions/min_length": 1004.0, + "completions/min_terminated_length": 1004.0, + "epoch": 0.5408852213053263, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.289033175199065, + "kl": 0.010406494140625, + "learning_rate": 5.647276768304473e-07, + "loss": -0.0095, + "num_tokens": 97790149.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.014935851097107, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.017028344525780403, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05036580597881235, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087683, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2163 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1209.0, + "completions/mean_terminated_length": 1141.84619140625, + "completions/min_length": 995.0, + "completions/min_terminated_length": 995.0, + "epoch": 0.5411352838209552, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4510310350289766, + "kl": 0.00737762451171875, + "learning_rate": 5.643350735055256e-07, + "loss": -0.0029, + "num_tokens": 97849117.0, + "reward": 0.0, + "reward_std": 0.950165867805481, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09524740760948304, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11306978672339087, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10532137766186217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2164 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1397.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 964.0625, + "completions/mean_terminated_length": 964.0625, + "completions/min_length": 684.0, + "completions/min_terminated_length": 684.0, + "epoch": 0.5413853463365842, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.59598703682898, + "kl": 0.01812744140625, + "learning_rate": 5.639424592577541e-07, + "loss": -0.0613, + "num_tokens": 97890878.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9934078454971313, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00011817353413954429, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03791059104054087, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12758439472669758, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2165 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1395.0, + "completions/mean_length": 1222.5625, + "completions/mean_terminated_length": 1158.5384521484375, + "completions/min_length": 884.0, + "completions/min_terminated_length": 884.0, + "epoch": 0.5416354088522131, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.811493096603487, + "kl": 0.0145416259765625, + "learning_rate": 5.635498343862923e-07, + "loss": 0.0012, + "num_tokens": 97930431.0, + "reward": 0.0, + "reward_std": 0.7393553853034973, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.058263407950424444, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08753367981221583, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10036968702787749, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2166 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1419.0, + "completions/mean_length": 1173.875, + "completions/mean_terminated_length": 1098.615478515625, + "completions/min_length": 740.0, + "completions/min_terminated_length": 740.0, + "epoch": 0.5418854713678419, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0616837299806425, + "kl": 0.015350341796875, + "learning_rate": 5.63157199190307e-07, + "loss": 0.0165, + "num_tokens": 97978525.0, + "reward": 0.0, + "reward_std": 0.7222151160240173, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02475, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06762987505533335, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09651328828101766, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2167 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1470.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1016.0, + "completions/mean_terminated_length": 1016.0, + "completions/min_length": 764.0, + "completions/min_terminated_length": 764.0, + "epoch": 0.5421355338834709, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1025591798379244, + "kl": 0.01171875, + "learning_rate": 5.627645539689734e-07, + "loss": -0.0142, + "num_tokens": 98024909.0, + "reward": 0.0, + "reward_std": 0.5591291189193726, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011547890782979912, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09764705647999833, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08062257748298553, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2168 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1429.5625, + "completions/mean_terminated_length": 1374.77783203125, + "completions/min_length": 1106.0, + "completions/min_terminated_length": 1106.0, + "epoch": 0.5423855963990998, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6725930935684463, + "kl": 0.0154571533203125, + "learning_rate": 5.623718990214739e-07, + "loss": -0.0166, + "num_tokens": 98079782.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9523901343345642, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.059018044269244785, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09151773564939282, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17841898254763516, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2169 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1430.125, + "completions/mean_terminated_length": 1375.77783203125, + "completions/min_length": 1258.0, + "completions/min_terminated_length": 1258.0, + "epoch": 0.5426356589147286, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6324586670955488, + "kl": 0.0155029296875, + "learning_rate": 5.619792346469987e-07, + "loss": -0.0099, + "num_tokens": 98140896.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8006809949874878, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.092868440891541, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12004311373599023, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2170 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1210.0625, + "completions/mean_terminated_length": 1078.272705078125, + "completions/min_length": 457.0, + "completions/min_terminated_length": 457.0, + "epoch": 0.5428857214303576, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4992262492773865, + "kl": 0.019256591796875, + "learning_rate": 5.615865611447449e-07, + "loss": 0.0014, + "num_tokens": 98198233.0, + "reward": 4.470348358154297e-08, + "reward_std": 1.0072968006134033, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.040624474941679214, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07092028843983031, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08432740427115681, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2171 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1326.0, + "completions/mean_length": 1127.125, + "completions/mean_terminated_length": 1073.857177734375, + "completions/min_length": 847.0, + "completions/min_terminated_length": 847.0, + "epoch": 0.5431357839459865, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5697840002372505, + "kl": 0.0120849609375, + "learning_rate": 5.611938788139167e-07, + "loss": 0.0103, + "num_tokens": 98240347.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9868322014808655, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08767984084267003, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08549264093803663, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06540472290116198, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2172 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1186.375, + "completions/mean_terminated_length": 998.2000122070312, + "completions/min_length": 652.0, + "completions/min_terminated_length": 652.0, + "epoch": 0.5433858464616154, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1369446099544818, + "kl": 0.0171966552734375, + "learning_rate": 5.60801187953725e-07, + "loss": 0.0969, + "num_tokens": 98283577.0, + "reward": 0.0, + "reward_std": 0.9208587408065796, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11678349187013996, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16918663860510996, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15371932093796678, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2173 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1217.0, + "completions/max_terminated_length": 1217.0, + "completions/mean_length": 1010.0625, + "completions/mean_terminated_length": 1010.0625, + "completions/min_length": 815.0, + "completions/min_terminated_length": 815.0, + "epoch": 0.5436359089772443, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6765102589773635, + "kl": 0.02197265625, + "learning_rate": 5.60408488863387e-07, + "loss": 0.0301, + "num_tokens": 98317282.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.5761712789535522, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10558397039348354, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15153376522662437, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13045504405165223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 2174 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1377.0, + "completions/max_terminated_length": 1377.0, + "completions/mean_length": 1006.5625, + "completions/mean_terminated_length": 1006.5625, + "completions/min_length": 812.0, + "completions/min_terminated_length": 812.0, + "epoch": 0.5438859714928732, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.836982735521804, + "kl": 0.0155792236328125, + "learning_rate": 5.600157818421266e-07, + "loss": -0.0163, + "num_tokens": 98356683.0, + "reward": 0.0, + "reward_std": 1.0066277980804443, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10214140118954834, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09498239591090576, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194865, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2175 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1208.1875, + "completions/mean_terminated_length": 1166.5, + "completions/min_length": 971.0, + "completions/min_terminated_length": 971.0, + "epoch": 0.5441360340085021, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3790503439576174, + "kl": 0.009490966796875, + "learning_rate": 5.596230671891733e-07, + "loss": -0.0304, + "num_tokens": 98403990.0, + "reward": 0.0, + "reward_std": 0.777179479598999, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05337130749686147, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10624724075933169, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921948, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2176 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1395.0, + "completions/mean_length": 1244.8125, + "completions/mean_terminated_length": 1208.357177734375, + "completions/min_length": 811.0, + "completions/min_terminated_length": 811.0, + "epoch": 0.5443860965241311, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.95521013484897, + "kl": 0.0160064697265625, + "learning_rate": 5.592303452037628e-07, + "loss": -0.0385, + "num_tokens": 98452403.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9106088280677795, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1086546308888241, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07892185093349009, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2177 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1420.0, + "completions/max_terminated_length": 1420.0, + "completions/mean_length": 960.1875, + "completions/mean_terminated_length": 960.1875, + "completions/min_length": 649.0, + "completions/min_terminated_length": 649.0, + "epoch": 0.5446361590397599, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8351597815825755, + "kl": 0.0167236328125, + "learning_rate": 5.58837616185136e-07, + "loss": -0.0657, + "num_tokens": 98487014.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.4740613102912903, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01069223005511357, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.37442248375961673, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15957118462605635, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2178 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1375.625, + "completions/mean_terminated_length": 1301.0, + "completions/min_length": 1183.0, + "completions/min_terminated_length": 1183.0, + "epoch": 0.5448862215553888, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7191150844113716, + "kl": 0.0107879638671875, + "learning_rate": 5.584448804325396e-07, + "loss": -0.0041, + "num_tokens": 98545520.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.0373358726501465, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.028799693187164542, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11985803397740251, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05962847939999443, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2179 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1282.0, + "completions/max_terminated_length": 1282.0, + "completions/mean_length": 1100.8125, + "completions/mean_terminated_length": 1100.8125, + "completions/min_length": 857.0, + "completions/min_terminated_length": 857.0, + "epoch": 0.5451362840710178, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.103774055037145, + "kl": 0.0158538818359375, + "learning_rate": 5.580521382452249e-07, + "loss": -0.0358, + "num_tokens": 98597341.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0220592021942139, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07419990140109364, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1315192776658885, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06652763279965648, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2180 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1075.9375, + "completions/mean_terminated_length": 1047.666748046875, + "completions/min_length": 700.0, + "completions/min_terminated_length": 700.0, + "epoch": 0.5453863465866466, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3847744357084193, + "kl": 0.00750732421875, + "learning_rate": 5.576593899224486e-07, + "loss": 0.0288, + "num_tokens": 98639396.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.770439863204956, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10191779948829818, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0823339489765876, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333334, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2181 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1414.8125, + "completions/mean_terminated_length": 1305.2857666015625, + "completions/min_length": 1118.0, + "completions/min_terminated_length": 1118.0, + "epoch": 0.5456364091022756, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.136467926695752, + "kl": 0.008209228515625, + "learning_rate": 5.572666357634719e-07, + "loss": -0.0144, + "num_tokens": 98683305.0, + "reward": 0.0, + "reward_std": 0.8859795928001404, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0073212973139351704, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05861057177975828, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07290277645477447, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2182 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1305.125, + "completions/mean_terminated_length": 1292.1334228515625, + "completions/min_length": 940.0, + "completions/min_terminated_length": 940.0, + "epoch": 0.5458864716179045, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.257810403417449, + "kl": 0.0157928466796875, + "learning_rate": 5.568738760675602e-07, + "loss": 0.025, + "num_tokens": 98728915.0, + "reward": 0.0, + "reward_std": 0.900383710861206, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05338602001324473, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15026013988084036, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12224747213928168, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2183 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1420.0, + "completions/mean_length": 1174.25, + "completions/mean_terminated_length": 1152.533447265625, + "completions/min_length": 879.0, + "completions/min_terminated_length": 879.0, + "epoch": 0.5461365341335334, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.998593283418261, + "kl": 0.0159912109375, + "learning_rate": 5.564811111339833e-07, + "loss": -0.0219, + "num_tokens": 98778703.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0124942064285278, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.034329429986482564, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04561792829472876, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1500617156989701, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2184 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1169.0, + "completions/max_terminated_length": 1169.0, + "completions/mean_length": 937.9375, + "completions/mean_terminated_length": 937.9375, + "completions/min_length": 674.0, + "completions/min_terminated_length": 674.0, + "epoch": 0.5463865966491623, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9355382659056026, + "kl": 0.0171356201171875, + "learning_rate": 5.560883412620155e-07, + "loss": -0.0247, + "num_tokens": 98811822.0, + "reward": 0.0, + "reward_std": 0.8736337423324585, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1116862466385613, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09562280196413768, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620107, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2185 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1319.125, + "completions/mean_terminated_length": 1293.2857666015625, + "completions/min_length": 803.0, + "completions/min_terminated_length": 803.0, + "epoch": 0.5466366591647912, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.523345495347882, + "kl": 0.0125732421875, + "learning_rate": 5.556955667509337e-07, + "loss": -0.0193, + "num_tokens": 98851176.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7508247494697571, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03388760967883839, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06346120926244134, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353545, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2186 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1204.6875, + "completions/mean_terminated_length": 975.0, + "completions/min_length": 683.0, + "completions/min_terminated_length": 683.0, + "epoch": 0.5468867216804201, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8623692564979524, + "kl": 0.0097808837890625, + "learning_rate": 5.553027879000196e-07, + "loss": 0.0164, + "num_tokens": 98891091.0, + "reward": 3.725290298461914e-09, + "reward_std": 0.9989153146743774, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0017892399370054001, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.022343742258856075, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.65, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08944271909999157, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2187 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1205.0, + "completions/max_terminated_length": 1205.0, + "completions/mean_length": 983.25, + "completions/mean_terminated_length": 983.25, + "completions/min_length": 826.0, + "completions/min_terminated_length": 826.0, + "epoch": 0.547136784196049, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1422765895293816, + "kl": 0.0131683349609375, + "learning_rate": 5.549100050085573e-07, + "loss": -0.0357, + "num_tokens": 98923383.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5901523232460022, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011622042773192532, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.022099447253383368, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06426219440409445, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2188 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1392.0, + "completions/mean_length": 1101.0625, + "completions/mean_terminated_length": 1074.4666748046875, + "completions/min_length": 560.0, + "completions/min_terminated_length": 560.0, + "epoch": 0.5473868467116779, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.613957809991019, + "kl": 0.012603759765625, + "learning_rate": 5.545172183758346e-07, + "loss": -0.0365, + "num_tokens": 98966960.0, + "reward": 0.0, + "reward_std": 0.8043425679206848, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09507753363138474, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1633728892078392, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353545, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2189 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1348.0, + "completions/max_terminated_length": 1348.0, + "completions/mean_length": 883.75, + "completions/mean_terminated_length": 883.75, + "completions/min_length": 441.0, + "completions/min_terminated_length": 441.0, + "epoch": 0.5476369092273068, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6099919741128423, + "kl": 0.014495849609375, + "learning_rate": 5.541244283011413e-07, + "loss": -0.0142, + "num_tokens": 99002420.0, + "reward": 0.0, + "reward_std": 0.720859169960022, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07647967639570559, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12028362298326215, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000003, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2190 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1388.0, + "completions/max_terminated_length": 1388.0, + "completions/mean_length": 1120.375, + "completions/mean_terminated_length": 1120.375, + "completions/min_length": 824.0, + "completions/min_terminated_length": 824.0, + "epoch": 0.5478869717429358, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1335625688333386, + "kl": 0.0138397216796875, + "learning_rate": 5.537316350837711e-07, + "loss": 0.0047, + "num_tokens": 99053210.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0336947441101074, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04853649343833746, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07653229977530235, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06763190130459204, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2191 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1375.0, + "completions/mean_length": 1053.75, + "completions/mean_terminated_length": 990.0000610351562, + "completions/min_length": 761.0, + "completions/min_terminated_length": 761.0, + "epoch": 0.5481370342585646, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.830027391801516, + "kl": 0.01245880126953125, + "learning_rate": 5.533388390230188e-07, + "loss": -0.0405, + "num_tokens": 99087886.0, + "reward": 0.0, + "reward_std": 0.6475691795349121, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02406177578910133, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.24912000709607987, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07302967433402217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2192 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1411.0, + "completions/mean_length": 1233.1875, + "completions/mean_terminated_length": 1144.25, + "completions/min_length": 859.0, + "completions/min_terminated_length": 859.0, + "epoch": 0.5483870967741935, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.907732701138131, + "kl": 0.0135345458984375, + "learning_rate": 5.529460404181821e-07, + "loss": -0.011, + "num_tokens": 99140209.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.38997820019721985, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.14245760913507274, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1542015309423243, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2193 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1216.0, + "completions/max_terminated_length": 1216.0, + "completions/mean_length": 1096.875, + "completions/mean_terminated_length": 1096.875, + "completions/min_length": 776.0, + "completions/min_terminated_length": 776.0, + "epoch": 0.5486371592898225, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.403523606177439, + "kl": 0.017547607421875, + "learning_rate": 5.525532395685608e-07, + "loss": -0.0107, + "num_tokens": 99182807.0, + "reward": 0.0, + "reward_std": 0.8399726152420044, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05900408021045227, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2194 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1150.0, + "completions/max_terminated_length": 1150.0, + "completions/mean_length": 892.5, + "completions/mean_terminated_length": 892.5, + "completions/min_length": 614.0, + "completions/min_terminated_length": 614.0, + "epoch": 0.5488872218054514, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.575536491846624, + "kl": 0.0163116455078125, + "learning_rate": 5.521604367734557e-07, + "loss": 0.0209, + "num_tokens": 99225143.0, + "reward": 0.0, + "reward_std": 0.40306729078292847, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0009635262153707166, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0791939367091561, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2195 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1146.0, + "completions/mean_length": 928.5625, + "completions/mean_terminated_length": 890.4667358398438, + "completions/min_length": 648.0, + "completions/min_terminated_length": 648.0, + "epoch": 0.5491372843210802, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1520216938095578, + "kl": 0.01703643798828125, + "learning_rate": 5.517676323321695e-07, + "loss": -0.015, + "num_tokens": 99264880.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0328587293624878, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.16177095540844585, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.087976538679376, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087679, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2196 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1346.0, + "completions/mean_length": 1285.5, + "completions/mean_terminated_length": 1156.800048828125, + "completions/min_length": 758.0, + "completions/min_terminated_length": 758.0, + "epoch": 0.5493873468367092, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.670957151744284, + "kl": 0.0129852294921875, + "learning_rate": 5.513748265440064e-07, + "loss": 0.0131, + "num_tokens": 99317896.0, + "reward": 0.0, + "reward_std": 0.4839158058166504, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921946, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2197 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1317.0, + "completions/mean_length": 1075.0625, + "completions/mean_terminated_length": 1046.7333984375, + "completions/min_length": 832.0, + "completions/min_terminated_length": 832.0, + "epoch": 0.5496374093523381, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1538458877122344, + "kl": 0.0121917724609375, + "learning_rate": 5.509820197082709e-07, + "loss": -0.0631, + "num_tokens": 99367113.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.55048006772995, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1950784335224227, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2224629128098234, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408157, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2198 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1211.5625, + "completions/mean_terminated_length": 1192.3333740234375, + "completions/min_length": 568.0, + "completions/min_terminated_length": 568.0, + "epoch": 0.5498874718679669, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7906266718612547, + "kl": 0.0145416259765625, + "learning_rate": 5.505892121242694e-07, + "loss": -0.0543, + "num_tokens": 99427730.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9331574440002441, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11700413133502796, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10974101467795118, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09418264367902598, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2199 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1351.0, + "completions/mean_length": 1223.0, + "completions/mean_terminated_length": 1056.800048828125, + "completions/min_length": 773.0, + "completions/min_terminated_length": 773.0, + "epoch": 0.5501375343835959, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4588782505568223, + "kl": 0.012115478515625, + "learning_rate": 5.501964040913076e-07, + "loss": 0.0174, + "num_tokens": 99478546.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6075373888015747, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09008241162378418, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08835303162298577, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11603000888978234, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2200 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1280.75, + "completions/mean_terminated_length": 1266.1334228515625, + "completions/min_length": 926.0, + "completions/min_terminated_length": 926.0, + "epoch": 0.5503875968992248, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2226132091666426, + "kl": 0.009613037109375, + "learning_rate": 5.498035959086923e-07, + "loss": 0.0064, + "num_tokens": 99525846.0, + "reward": 0.0, + "reward_std": 0.6774532794952393, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11173171495464321, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05657687157406909, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036264, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2201 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1147.0, + "completions/max_terminated_length": 1147.0, + "completions/mean_length": 971.0, + "completions/mean_terminated_length": 971.0, + "completions/min_length": 771.0, + "completions/min_terminated_length": 771.0, + "epoch": 0.5506376594148538, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6698230171079356, + "kl": 0.018218994140625, + "learning_rate": 5.494107878757307e-07, + "loss": 0.0126, + "num_tokens": 99563142.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0493272542953491, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04862956157977817, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05876339401098231, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09496588081262934, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2202 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1377.4375, + "completions/mean_terminated_length": 1219.857177734375, + "completions/min_length": 1009.0, + "completions/min_terminated_length": 1009.0, + "epoch": 0.5508877219304826, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8241253421828785, + "kl": 0.0149993896484375, + "learning_rate": 5.490179802917291e-07, + "loss": 0.0159, + "num_tokens": 99625573.0, + "reward": 0.0, + "reward_std": 0.9704320430755615, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08352721441424088, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0544638569322449, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.65, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12531441937663718, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2203 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1056.4375, + "completions/mean_terminated_length": 908.5833740234375, + "completions/min_length": 527.0, + "completions/min_terminated_length": 527.0, + "epoch": 0.5511377844461115, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4714087649467817, + "kl": 0.0124664306640625, + "learning_rate": 5.486251734559938e-07, + "loss": -0.0574, + "num_tokens": 99671068.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9304741621017456, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10552114914984852, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0962735628441722, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13709958532503408, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2204 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1159.8125, + "completions/mean_terminated_length": 1081.3077392578125, + "completions/min_length": 728.0, + "completions/min_terminated_length": 728.0, + "epoch": 0.5513878469617405, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2348222467785996, + "kl": 0.01678466796875, + "learning_rate": 5.482323676678305e-07, + "loss": 0.004, + "num_tokens": 99711705.0, + "reward": 0.0, + "reward_std": 0.9893856048583984, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.008296251788859104, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.033185007155436416, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07084150279686703, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2205 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1311.75, + "completions/mean_terminated_length": 1249.0, + "completions/min_length": 652.0, + "completions/min_terminated_length": 652.0, + "epoch": 0.5516379094773693, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.44242943376494, + "kl": 0.01446533203125, + "learning_rate": 5.478395632265445e-07, + "loss": -0.1033, + "num_tokens": 99768853.0, + "reward": 0.0, + "reward_std": 0.7662843465805054, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05926329177006571, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05895978516265932, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2206 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1230.0, + "completions/max_terminated_length": 1230.0, + "completions/mean_length": 1069.125, + "completions/mean_terminated_length": 1069.125, + "completions/min_length": 856.0, + "completions/min_terminated_length": 856.0, + "epoch": 0.5518879719929982, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7584215614404877, + "kl": 0.011810302734375, + "learning_rate": 5.474467604314393e-07, + "loss": -0.0038, + "num_tokens": 99809039.0, + "reward": 0.0, + "reward_std": 0.9367978572845459, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0031452772233621726, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03797604494590243, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2207 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1425.6875, + "completions/mean_terminated_length": 1330.1429443359375, + "completions/min_length": 1173.0, + "completions/min_terminated_length": 1173.0, + "epoch": 0.5521380345086272, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5628425848099345, + "kl": 0.0128326416015625, + "learning_rate": 5.470539595818178e-07, + "loss": -0.003, + "num_tokens": 99860602.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0165280103683472, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1787134123376647, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14937708476560116, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.050000000000000024, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 2208 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1175.75, + "completions/mean_terminated_length": 1129.4285888671875, + "completions/min_length": 885.0, + "completions/min_terminated_length": 885.0, + "epoch": 0.5523880970242561, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.285682749729301, + "kl": 0.0175933837890625, + "learning_rate": 5.466611609769813e-07, + "loss": -0.0058, + "num_tokens": 99904366.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.920250654220581, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03138679106414955, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04273423101647875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08421753138505422, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2209 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1285.0, + "completions/max_terminated_length": 1285.0, + "completions/mean_length": 1152.25, + "completions/mean_terminated_length": 1152.25, + "completions/min_length": 910.0, + "completions/min_terminated_length": 910.0, + "epoch": 0.5526381595398849, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4972387633529456, + "kl": 0.0151214599609375, + "learning_rate": 5.462683649162291e-07, + "loss": -0.0244, + "num_tokens": 99950922.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9868754744529724, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.030778555481612575, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.038327756767597636, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09888264649460886, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2210 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1213.0, + "completions/max_terminated_length": 1213.0, + "completions/mean_length": 947.375, + "completions/mean_terminated_length": 947.375, + "completions/min_length": 777.0, + "completions/min_terminated_length": 777.0, + "epoch": 0.5528882220555139, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3134826063299254, + "kl": 0.0173797607421875, + "learning_rate": 5.458755716988587e-07, + "loss": 0.0168, + "num_tokens": 99986232.0, + "reward": 0.0, + "reward_std": 0.9486784934997559, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08860236318482177, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1754499113286534, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0758897836290186, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2211 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1366.0, + "completions/max_terminated_length": 1366.0, + "completions/mean_length": 1052.6875, + "completions/mean_terminated_length": 1052.6875, + "completions/min_length": 786.0, + "completions/min_terminated_length": 786.0, + "epoch": 0.5531382845711428, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5404343484019276, + "kl": 0.01971435546875, + "learning_rate": 5.454827816241655e-07, + "loss": 0.0239, + "num_tokens": 100025251.0, + "reward": 0.0, + "reward_std": 0.9478340148925781, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07096016461077914, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06759561198695806, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11409872268574492, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2212 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1406.0, + "completions/max_terminated_length": 1406.0, + "completions/mean_length": 1220.25, + "completions/mean_terminated_length": 1220.25, + "completions/min_length": 955.0, + "completions/min_terminated_length": 955.0, + "epoch": 0.5533883470867716, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6147009543798445, + "kl": 0.015777587890625, + "learning_rate": 5.450899949914427e-07, + "loss": -0.0423, + "num_tokens": 100075279.0, + "reward": 0.0, + "reward_std": 0.8987574577331543, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.023812503668722868, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08156158140905813, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952502, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2213 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1059.4375, + "completions/mean_terminated_length": 1030.0667724609375, + "completions/min_length": 662.0, + "completions/min_terminated_length": 662.0, + "epoch": 0.5536384096024006, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.458116687083742, + "kl": 0.0177764892578125, + "learning_rate": 5.446972120999804e-07, + "loss": -0.0283, + "num_tokens": 100109558.0, + "reward": -3.725290298461914e-08, + "reward_std": 1.0314393043518066, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02304417337013977, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07854007465676258, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2214 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1414.0, + "completions/mean_length": 1169.5625, + "completions/mean_terminated_length": 1093.3077392578125, + "completions/min_length": 754.0, + "completions/min_terminated_length": 754.0, + "epoch": 0.5538884721180295, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.134798773391169, + "kl": 0.0142822265625, + "learning_rate": 5.443044332490661e-07, + "loss": -0.0545, + "num_tokens": 100155311.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5091114044189453, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.007564079941202218, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12631002035853245, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05374838498865703, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2215 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 1328.8125, + "completions/mean_terminated_length": 1289.3077392578125, + "completions/min_length": 1095.0, + "completions/min_terminated_length": 1095.0, + "epoch": 0.5541385346336584, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.929540799874657, + "kl": 0.01513671875, + "learning_rate": 5.439116587379846e-07, + "loss": 0.0013, + "num_tokens": 100210332.0, + "reward": 0.0, + "reward_std": 0.8535051941871643, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05134317845005829, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08569675692734699, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07302967433402217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2216 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1319.25, + "completions/mean_terminated_length": 1210.800048828125, + "completions/min_length": 898.0, + "completions/min_terminated_length": 898.0, + "epoch": 0.5543885971492873, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.532286367803046, + "kl": 0.0124053955078125, + "learning_rate": 5.435188888660167e-07, + "loss": -0.0363, + "num_tokens": 100266520.0, + "reward": 0.0, + "reward_std": 0.9152195453643799, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0008130262413936443, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10378159855266465, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10945995377982527, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2217 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1459.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1164.3125, + "completions/mean_terminated_length": 1164.3125, + "completions/min_length": 993.0, + "completions/min_terminated_length": 993.0, + "epoch": 0.5546386596649162, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.639471937027752, + "kl": 0.01070404052734375, + "learning_rate": 5.431261239324401e-07, + "loss": -0.0207, + "num_tokens": 100306213.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6413066387176514, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0629386295845636, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12869680519442664, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466157, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2218 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1467.0, + "completions/mean_length": 1151.1875, + "completions/mean_terminated_length": 1101.357177734375, + "completions/min_length": 684.0, + "completions/min_terminated_length": 684.0, + "epoch": 0.5548887221805452, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.433173022315902, + "kl": 0.0108642578125, + "learning_rate": 5.427333642365281e-07, + "loss": -0.03, + "num_tokens": 100362064.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.93462735414505, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.37755792342652, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2995848796044446, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09737289911202955, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2219 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1467.0, + "completions/max_terminated_length": 1467.0, + "completions/mean_length": 1259.8125, + "completions/mean_terminated_length": 1259.8125, + "completions/min_length": 1050.0, + "completions/min_terminated_length": 1050.0, + "epoch": 0.5551387846961741, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6724391797788782, + "kl": 0.013824462890625, + "learning_rate": 5.423406100775515e-07, + "loss": 0.0125, + "num_tokens": 100403309.0, + "reward": 0.0, + "reward_std": 0.6519013047218323, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07515108254099366, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1306111662577058, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11147163731607214, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2220 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1198.6875, + "completions/mean_terminated_length": 1155.6429443359375, + "completions/min_length": 963.0, + "completions/min_terminated_length": 963.0, + "epoch": 0.5553888472118029, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.277469932452028, + "kl": 0.0165863037109375, + "learning_rate": 5.419478617547751e-07, + "loss": -0.0138, + "num_tokens": 100449184.0, + "reward": 0.0, + "reward_std": 0.5970100164413452, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03915640083351985, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.048189338308492057, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12524050936172845, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2221 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1215.0, + "completions/mean_terminated_length": 1196.0001220703125, + "completions/min_length": 1050.0, + "completions/min_terminated_length": 1050.0, + "epoch": 0.5556389097274319, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.694601925840056, + "kl": 0.012908935546875, + "learning_rate": 5.415551195674604e-07, + "loss": -0.0029, + "num_tokens": 100497312.0, + "reward": 0.0, + "reward_std": 0.529692530632019, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1632645956516643, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16395618482692395, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10174405069512348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2222 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1388.0, + "completions/mean_length": 949.5625, + "completions/mean_terminated_length": 912.86669921875, + "completions/min_length": 648.0, + "completions/min_terminated_length": 648.0, + "epoch": 0.5558889722430608, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.319980719268365, + "kl": 0.018585205078125, + "learning_rate": 5.41162383814864e-07, + "loss": -0.0094, + "num_tokens": 100528009.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.943160891532898, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010297313234024524, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.01769572868539106, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14548768561863465, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2223 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1309.5625, + "completions/mean_terminated_length": 1246.0833740234375, + "completions/min_length": 920.0, + "completions/min_terminated_length": 920.0, + "epoch": 0.5561390347586896, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.974750634247963, + "kl": 0.016571044921875, + "learning_rate": 5.407696547962372e-07, + "loss": -0.0561, + "num_tokens": 100578746.0, + "reward": -3.725290298461914e-08, + "reward_std": 1.066868543624878, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04224811914164518, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09841251133881282, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11122216672215289, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2224 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1378.0, + "completions/max_terminated_length": 1378.0, + "completions/mean_length": 1180.875, + "completions/mean_terminated_length": 1180.875, + "completions/min_length": 1035.0, + "completions/min_terminated_length": 1035.0, + "epoch": 0.5563890972743186, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.951081903718793, + "kl": 0.015777587890625, + "learning_rate": 5.403769328108267e-07, + "loss": 0.0137, + "num_tokens": 100625424.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9850627183914185, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.035896797435303295, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05816875796864235, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10027739304327551, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2225 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1178.4375, + "completions/mean_terminated_length": 1104.2308349609375, + "completions/min_length": 732.0, + "completions/min_terminated_length": 732.0, + "epoch": 0.5566391597899475, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3984652125043633, + "kl": 0.0149078369140625, + "learning_rate": 5.399842181578735e-07, + "loss": 0.0006, + "num_tokens": 100679183.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8644700050354004, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.008903081055187438, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03715751344842853, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901162, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2226 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1157.5625, + "completions/mean_terminated_length": 1001.9091186523438, + "completions/min_length": 865.0, + "completions/min_terminated_length": 865.0, + "epoch": 0.5568892223055764, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5959274631503932, + "kl": 0.009124755859375, + "learning_rate": 5.395915111366131e-07, + "loss": -0.0115, + "num_tokens": 100725464.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8450931310653687, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.013338336716269888, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08966272798136159, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06191391873668906, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2227 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1378.0, + "completions/max_terminated_length": 1378.0, + "completions/mean_length": 1057.8125, + "completions/mean_terminated_length": 1057.8125, + "completions/min_length": 526.0, + "completions/min_terminated_length": 526.0, + "epoch": 0.5571392848212053, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6169571880668316, + "kl": 0.0134429931640625, + "learning_rate": 5.391988120462752e-07, + "loss": 0.0227, + "num_tokens": 100772781.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9833099842071533, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04551106588525325, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13161937864582823, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.077817450199525, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2228 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 1500.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 1500.0, + "completions/min_terminated_length": 0.0, + "epoch": 0.5573893473368342, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.318814921724928, + "kl": 0.0117645263671875, + "learning_rate": 5.388061211860834e-07, + "loss": 0.0005, + "num_tokens": 100831821.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.761570155620575, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0689358887542665, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10464738546063387, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09269623828717428, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2229 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1033.0625, + "completions/mean_terminated_length": 966.357177734375, + "completions/min_length": 697.0, + "completions/min_terminated_length": 697.0, + "epoch": 0.5576394098524631, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7544403770441463, + "kl": 0.022735595703125, + "learning_rate": 5.384134388552551e-07, + "loss": -0.1363, + "num_tokens": 100873086.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8601086139678955, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09180365547227345, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16819046411070823, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11409872268574492, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2230 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1279.6875, + "completions/mean_terminated_length": 1228.84619140625, + "completions/min_length": 821.0, + "completions/min_terminated_length": 821.0, + "epoch": 0.557889472368092, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9658076141867484, + "kl": 0.0174102783203125, + "learning_rate": 5.380207653530013e-07, + "loss": 0.0155, + "num_tokens": 100910249.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7888146638870239, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.007422364108411909, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0660648850387417, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12164002752505566, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2231 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 1143.5625, + "completions/mean_terminated_length": 1119.800048828125, + "completions/min_length": 808.0, + "completions/min_terminated_length": 808.0, + "epoch": 0.5581395348837209, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.259987238435647, + "kl": 0.0157623291015625, + "learning_rate": 5.376281009785262e-07, + "loss": -0.0126, + "num_tokens": 100948210.0, + "reward": 0.0, + "reward_std": 0.8412267565727234, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.012878608353319231, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06965856117235095, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12641788434189796, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2232 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1247.0, + "completions/mean_length": 1050.75, + "completions/mean_terminated_length": 947.0769653320312, + "completions/min_length": 579.0, + "completions/min_terminated_length": 579.0, + "epoch": 0.5583895973993498, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.835606787385734, + "kl": 0.019805908203125, + "learning_rate": 5.372354460310267e-07, + "loss": -0.0516, + "num_tokens": 100987558.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0562653541564941, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04722017451292761, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1108622297241172, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10183501544346313, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2233 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1377.3125, + "completions/mean_terminated_length": 1359.7857666015625, + "completions/min_length": 1213.0, + "completions/min_terminated_length": 1213.0, + "epoch": 0.5586396599149788, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.820251323715121, + "kl": 0.0123748779296875, + "learning_rate": 5.36842800809693e-07, + "loss": 0.0156, + "num_tokens": 101037155.0, + "reward": 0.0, + "reward_std": 1.0285301208496094, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.012202856950244255, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10772722158837869, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.048686449556014796, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2234 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1357.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 1000.3125, + "completions/mean_terminated_length": 1000.3125, + "completions/min_length": 794.0, + "completions/min_terminated_length": 794.0, + "epoch": 0.5588897224306076, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.877886881725343, + "kl": 0.00958251953125, + "learning_rate": 5.364501656137079e-07, + "loss": -0.0213, + "num_tokens": 101072216.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9692012071609497, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04661428236258012, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.051582213310720756, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10318986456114841, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2235 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1265.4375, + "completions/mean_terminated_length": 1124.7000732421875, + "completions/min_length": 924.0, + "completions/min_terminated_length": 924.0, + "epoch": 0.5591397849462365, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.663425561983497, + "kl": 0.0144195556640625, + "learning_rate": 5.360575407422458e-07, + "loss": 0.0001, + "num_tokens": 101126911.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5653176307678223, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.009719044535119185, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18883835636164328, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1112221667221529, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2236 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1285.75, + "completions/mean_terminated_length": 1271.4666748046875, + "completions/min_length": 1005.0, + "completions/min_terminated_length": 1005.0, + "epoch": 0.5593898474618655, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.034515186618796, + "kl": 0.018280029296875, + "learning_rate": 5.356649264944745e-07, + "loss": -0.0497, + "num_tokens": 101169387.0, + "reward": 3.725290298461914e-08, + "reward_std": 1.0209271907806396, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04029135801672779, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06432641946921251, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238704, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2237 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1363.0, + "completions/mean_length": 1253.875, + "completions/mean_terminated_length": 1106.2000732421875, + "completions/min_length": 943.0, + "completions/min_terminated_length": 943.0, + "epoch": 0.5596399099774944, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.10042523173841, + "kl": 0.0186004638671875, + "learning_rate": 5.352723231695528e-07, + "loss": 0.0231, + "num_tokens": 101220665.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0629804134368896, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03713990307770006, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1416421832755986, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06426219440409447, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2238 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1369.0, + "completions/mean_length": 1168.5625, + "completions/mean_terminated_length": 1017.9091186523438, + "completions/min_length": 770.0, + "completions/min_terminated_length": 770.0, + "epoch": 0.5598899724931233, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.826967558672326, + "kl": 0.01556396484375, + "learning_rate": 5.34879731066631e-07, + "loss": 0.0154, + "num_tokens": 101270042.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9782330989837646, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08331795836641968, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13715352704359515, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2239 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1487.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1044.0625, + "completions/mean_terminated_length": 1044.0625, + "completions/min_length": 649.0, + "completions/min_terminated_length": 649.0, + "epoch": 0.5601400350087522, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.538683852489407, + "kl": 0.020782470703125, + "learning_rate": 5.344871504848514e-07, + "loss": -0.1049, + "num_tokens": 101313083.0, + "reward": 0.0, + "reward_std": 0.5862321853637695, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.026311492509454977, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03709011008688107, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797316, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2240 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1429.0, + "completions/max_terminated_length": 1429.0, + "completions/mean_length": 1108.875, + "completions/mean_terminated_length": 1108.875, + "completions/min_length": 854.0, + "completions/min_terminated_length": 854.0, + "epoch": 0.5603900975243811, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.875320641455022, + "kl": 0.010772705078125, + "learning_rate": 5.340945817233475e-07, + "loss": -0.0806, + "num_tokens": 101364217.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0591137409210205, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0962609936797834, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10739653521284506, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11547005383792516, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2241 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1221.0, + "completions/mean_length": 1151.0625, + "completions/mean_terminated_length": 992.45458984375, + "completions/min_length": 736.0, + "completions/min_terminated_length": 736.0, + "epoch": 0.56064016004001, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1985395673093913, + "kl": 0.0168304443359375, + "learning_rate": 5.337020250812436e-07, + "loss": 0.0083, + "num_tokens": 101408346.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9044009447097778, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.015469841108600668, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04142332161443495, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10602235962635781, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2242 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1284.0, + "completions/max_terminated_length": 1284.0, + "completions/mean_length": 790.75, + "completions/mean_terminated_length": 790.75, + "completions/min_length": 525.0, + "completions/min_terminated_length": 525.0, + "epoch": 0.5608902225556389, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7423204980808906, + "kl": 0.01214599609375, + "learning_rate": 5.333094808576548e-07, + "loss": 0.0059, + "num_tokens": 101445710.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0132734775543213, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04849289827193898, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10575239460173647, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10809803506625451, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2243 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1223.0, + "completions/max_terminated_length": 1223.0, + "completions/mean_length": 997.9375, + "completions/mean_terminated_length": 997.9375, + "completions/min_length": 680.0, + "completions/min_terminated_length": 680.0, + "epoch": 0.5611402850712678, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.539904998516066, + "kl": 0.02056884765625, + "learning_rate": 5.329169493516869e-07, + "loss": -0.0567, + "num_tokens": 101479789.0, + "reward": 0.0, + "reward_std": 0.9596482515335083, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06276082171417702, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06431266024915261, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10532137766186213, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2244 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1101.375, + "completions/mean_terminated_length": 1009.3846435546875, + "completions/min_length": 790.0, + "completions/min_terminated_length": 790.0, + "epoch": 0.5613903475868968, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1650999324252522, + "kl": 0.02197265625, + "learning_rate": 5.325244308624356e-07, + "loss": -0.0858, + "num_tokens": 101524179.0, + "reward": 0.0, + "reward_std": 0.7156879901885986, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.6890439832193369, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11445251702764406, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087683, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2245 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1313.0, + "completions/max_terminated_length": 1313.0, + "completions/mean_length": 1044.875, + "completions/mean_terminated_length": 1044.875, + "completions/min_length": 665.0, + "completions/min_terminated_length": 665.0, + "epoch": 0.5616404101025256, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4557828431307023, + "kl": 0.020294189453125, + "learning_rate": 5.32131925688987e-07, + "loss": -0.0428, + "num_tokens": 101570641.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8416419625282288, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01477801907403989, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06610644981104544, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797315, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2246 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1337.0, + "completions/mean_length": 1330.125, + "completions/mean_terminated_length": 1160.25, + "completions/min_length": 977.0, + "completions/min_terminated_length": 977.0, + "epoch": 0.5618904726181545, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1941930877988285, + "kl": 0.01947021484375, + "learning_rate": 5.31739434130417e-07, + "loss": -0.0127, + "num_tokens": 101618867.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9077098369598389, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08386631889722515, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20143712860900512, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2247 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1410.9375, + "completions/mean_terminated_length": 1296.4285888671875, + "completions/min_length": 997.0, + "completions/min_terminated_length": 997.0, + "epoch": 0.5621405351337835, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7457783397963915, + "kl": 0.0156707763671875, + "learning_rate": 5.313469564857912e-07, + "loss": 0.003, + "num_tokens": 101673498.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9426655769348145, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02931478977209338, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.049642616514817425, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06309898162000305, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2248 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1385.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 1091.1875, + "completions/mean_terminated_length": 1091.1875, + "completions/min_length": 821.0, + "completions/min_terminated_length": 821.0, + "epoch": 0.5623905976494123, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.315170174100997, + "kl": 0.01483154296875, + "learning_rate": 5.309544930541645e-07, + "loss": 0.0208, + "num_tokens": 101711853.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9804953336715698, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09324953694726329, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17239520499578864, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05146016078626408, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2249 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1188.0, + "completions/max_terminated_length": 1188.0, + "completions/mean_length": 800.875, + "completions/mean_terminated_length": 800.875, + "completions/min_length": 536.0, + "completions/min_terminated_length": 536.0, + "epoch": 0.5626406601650412, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8716916191296913, + "kl": 0.007598876953125, + "learning_rate": 5.305620441345809e-07, + "loss": -0.0169, + "num_tokens": 101762387.0, + "reward": 0.0, + "reward_std": 0.9748215079307556, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.038000036723214614, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09110612436417702, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07490735018081413, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2250 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1418.0, + "completions/mean_terminated_length": 1281.3333740234375, + "completions/min_length": 807.0, + "completions/min_terminated_length": 807.0, + "epoch": 0.5628907226806702, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9933181734639938, + "kl": 0.012359619140625, + "learning_rate": 5.301696100260736e-07, + "loss": -0.0397, + "num_tokens": 101819651.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8184797167778015, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.030209113445169602, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03996622920148869, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.03442651863295484, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2251 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1237.625, + "completions/mean_terminated_length": 1118.3636474609375, + "completions/min_length": 839.0, + "completions/min_terminated_length": 839.0, + "epoch": 0.5631407851962991, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5941714671944824, + "kl": 0.0148773193359375, + "learning_rate": 5.297771910276643e-07, + "loss": -0.029, + "num_tokens": 101866549.0, + "reward": 0.0, + "reward_std": 0.6771284341812134, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.16575807365296222, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19676295892347093, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11729986896522632, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2252 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1389.0, + "completions/max_terminated_length": 1389.0, + "completions/mean_length": 1119.9375, + "completions/mean_terminated_length": 1119.9375, + "completions/min_length": 784.0, + "completions/min_terminated_length": 784.0, + "epoch": 0.5633908477119279, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.360489651081483, + "kl": 0.019195556640625, + "learning_rate": 5.293847874383632e-07, + "loss": 0.0334, + "num_tokens": 101910836.0, + "reward": 0.0, + "reward_std": 0.6538268327713013, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10150192373854997, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2510507659817192, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13601470508735444, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2253 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1257.4375, + "completions/mean_terminated_length": 1176.5833740234375, + "completions/min_length": 897.0, + "completions/min_terminated_length": 897.0, + "epoch": 0.5636409102275569, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3271645989747625, + "kl": 0.0160675048828125, + "learning_rate": 5.289923995571684e-07, + "loss": -0.075, + "num_tokens": 101956691.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0048019886016846, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.059502641779286956, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09920918754932435, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2254 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1311.75, + "completions/mean_terminated_length": 1198.800048828125, + "completions/min_length": 803.0, + "completions/min_terminated_length": 803.0, + "epoch": 0.5638909727431858, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4139520566859067, + "kl": 0.0196685791015625, + "learning_rate": 5.286000276830671e-07, + "loss": 0.0634, + "num_tokens": 102007575.0, + "reward": 0.0, + "reward_std": 0.9891918897628784, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10566129234385088, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11878694161005203, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353545, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2255 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 994.5, + "completions/mean_terminated_length": 922.2857666015625, + "completions/min_length": 592.0, + "completions/min_terminated_length": 592.0, + "epoch": 0.5641410352588146, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.162061983614614, + "kl": 0.011199951171875, + "learning_rate": 5.282076721150334e-07, + "loss": 0.0577, + "num_tokens": 102061039.0, + "reward": -2.2351741790771484e-08, + "reward_std": 0.995070219039917, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.122555902790316, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15350611477536727, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.2180298994106933, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2256 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1247.3125, + "completions/mean_terminated_length": 1230.4666748046875, + "completions/min_length": 1068.0, + "completions/min_terminated_length": 1068.0, + "epoch": 0.5643910977744436, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0374271692119583, + "kl": 0.015228271484375, + "learning_rate": 5.278153331520287e-07, + "loss": 0.039, + "num_tokens": 102106916.0, + "reward": 0.0, + "reward_std": 0.6283268928527832, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03412099907539941, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14013662506781635, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09496588081262936, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2257 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 1152.5625, + "completions/mean_terminated_length": 1072.3846435546875, + "completions/min_length": 738.0, + "completions/min_terminated_length": 738.0, + "epoch": 0.5646411602900725, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.165426913688285, + "kl": 0.0144805908203125, + "learning_rate": 5.274230110930031e-07, + "loss": 0.0275, + "num_tokens": 102157629.0, + "reward": 0.0, + "reward_std": 0.8862087726593018, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09903956426099383, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.28824390983500403, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15962919996504865, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2258 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1220.0, + "completions/max_terminated_length": 1220.0, + "completions/mean_length": 869.0625, + "completions/mean_terminated_length": 869.0625, + "completions/min_length": 726.0, + "completions/min_terminated_length": 726.0, + "epoch": 0.5648912228057015, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.402047155625281, + "kl": 0.016510009765625, + "learning_rate": 5.270307062368923e-07, + "loss": -0.0346, + "num_tokens": 102182054.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9045581817626953, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04060173972364841, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04864708453502001, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0958393717904348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2259 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1319.0, + "completions/max_terminated_length": 1319.0, + "completions/mean_length": 1003.4375, + "completions/mean_terminated_length": 1003.4375, + "completions/min_length": 719.0, + "completions/min_terminated_length": 719.0, + "epoch": 0.5651412853213303, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7918160882744285, + "kl": 0.018341064453125, + "learning_rate": 5.2663841888262e-07, + "loss": -0.0171, + "num_tokens": 102219933.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8198275566101074, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04556904149853948, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10187831043294657, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2260 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1175.5625, + "completions/mean_terminated_length": 1067.416748046875, + "completions/min_length": 823.0, + "completions/min_terminated_length": 823.0, + "epoch": 0.5653913478369592, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4096411399621656, + "kl": 0.0171661376953125, + "learning_rate": 5.262461493290955e-07, + "loss": -0.0058, + "num_tokens": 102263870.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9289268255233765, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0798319464865321, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17915545653374845, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15341785110291775, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2261 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1405.0, + "completions/mean_length": 1153.75, + "completions/mean_terminated_length": 1130.666748046875, + "completions/min_length": 762.0, + "completions/min_terminated_length": 762.0, + "epoch": 0.5656414103525882, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2532558711810053, + "kl": 0.020263671875, + "learning_rate": 5.258538978752158e-07, + "loss": -0.02, + "num_tokens": 102303234.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0620956420898438, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.023949664554753836, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11699025176942096, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8250000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2262 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1149.0, + "completions/max_terminated_length": 1149.0, + "completions/mean_length": 785.5, + "completions/mean_terminated_length": 785.5, + "completions/min_length": 533.0, + "completions/min_terminated_length": 533.0, + "epoch": 0.5658914728682171, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.551188591011897, + "kl": 0.0179901123046875, + "learning_rate": 5.254616648198634e-07, + "loss": 0.0292, + "num_tokens": 102338978.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0536489486694336, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0030501142538874953, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.025052849879367435, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238707, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2263 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1173.4375, + "completions/mean_terminated_length": 1064.5833740234375, + "completions/min_length": 790.0, + "completions/min_terminated_length": 790.0, + "epoch": 0.5661415353838459, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3285663159326457, + "kl": 0.0229949951171875, + "learning_rate": 5.250694504619063e-07, + "loss": 0.0002, + "num_tokens": 102386057.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9754273295402527, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.026238292828932704, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04397541097387511, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11147163731607215, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2264 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1428.0, + "completions/mean_terminated_length": 1335.4285888671875, + "completions/min_length": 1006.0, + "completions/min_terminated_length": 1006.0, + "epoch": 0.5663915978994749, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7046356296187355, + "kl": 0.0110015869140625, + "learning_rate": 5.246772551001995e-07, + "loss": -0.014, + "num_tokens": 102435697.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8682008981704712, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09463891412140629, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1514781921150718, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14782371884055634, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2265 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1457.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1211.5625, + "completions/mean_terminated_length": 1211.5625, + "completions/min_length": 953.0, + "completions/min_terminated_length": 953.0, + "epoch": 0.5666416604151038, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4803072271046607, + "kl": 0.0096588134765625, + "learning_rate": 5.242850790335823e-07, + "loss": 0.017, + "num_tokens": 102481890.0, + "reward": 0.0, + "reward_std": 0.867551326751709, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005197979858959208, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04378926592742114, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14751020052613062, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2266 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1360.875, + "completions/mean_terminated_length": 1328.769287109375, + "completions/min_length": 955.0, + "completions/min_terminated_length": 955.0, + "epoch": 0.5668917229307326, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.662209165942265, + "kl": 0.011199951171875, + "learning_rate": 5.238929225608803e-07, + "loss": -0.0324, + "num_tokens": 102529016.0, + "reward": 4.470348358154297e-08, + "reward_std": 0.9678479433059692, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06032300221939885, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.061089980120641665, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04127594582445937, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2267 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1382.0, + "completions/mean_length": 1011.625, + "completions/mean_terminated_length": 718.6000366210938, + "completions/min_length": 474.0, + "completions/min_terminated_length": 474.0, + "epoch": 0.5671417854463616, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.991912360316003, + "kl": 0.01204681396484375, + "learning_rate": 5.235007859809032e-07, + "loss": 0.0793, + "num_tokens": 102561530.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.043903112411499, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04668381068027395, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07339654538329266, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15752718754175363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2268 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1293.375, + "completions/mean_terminated_length": 1199.45458984375, + "completions/min_length": 793.0, + "completions/min_terminated_length": 793.0, + "epoch": 0.5673918479619905, + "frac_reward_zero_std": 0.0, + "grad_norm": 5.134533725037948, + "kl": 0.125762939453125, + "learning_rate": 5.231086695924464e-07, + "loss": -0.0118, + "num_tokens": 102616256.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9239065647125244, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.051205870767813716, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14268069142819703, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11547005383792518, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2269 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1238.0, + "completions/max_terminated_length": 1238.0, + "completions/mean_length": 1067.6875, + "completions/mean_terminated_length": 1067.6875, + "completions/min_length": 760.0, + "completions/min_terminated_length": 760.0, + "epoch": 0.5676419104776194, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4951790954316073, + "kl": 0.01495361328125, + "learning_rate": 5.227165736942894e-07, + "loss": -0.0069, + "num_tokens": 102656379.0, + "reward": 0.0, + "reward_std": 0.5377345681190491, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10445769673207637, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15514928557813748, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11013459778666118, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2270 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1163.9375, + "completions/mean_terminated_length": 1011.1818237304688, + "completions/min_length": 751.0, + "completions/min_terminated_length": 751.0, + "epoch": 0.5678919729932483, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7856103738509774, + "kl": 0.01336669921875, + "learning_rate": 5.223244985851961e-07, + "loss": -0.0372, + "num_tokens": 102698786.0, + "reward": 2.2351741790771484e-08, + "reward_std": 0.9828059673309326, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10458217773942607, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09452895702236234, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09418264367902597, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2271 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1425.0, + "completions/max_terminated_length": 1425.0, + "completions/mean_length": 1169.1875, + "completions/mean_terminated_length": 1169.1875, + "completions/min_length": 976.0, + "completions/min_terminated_length": 976.0, + "epoch": 0.5681420355088772, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1277380934066863, + "kl": 0.017669677734375, + "learning_rate": 5.21932444563915e-07, + "loss": 0.0174, + "num_tokens": 102744309.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6584100723266602, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.051832464614403675, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0865418479557941, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11013459778666118, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2272 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1468.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 990.625, + "completions/mean_terminated_length": 990.625, + "completions/min_length": 811.0, + "completions/min_terminated_length": 811.0, + "epoch": 0.5683920980245061, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9666957487835268, + "kl": 0.01194000244140625, + "learning_rate": 5.215404119291784e-07, + "loss": 0.0473, + "num_tokens": 102772551.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9570653438568115, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.023385152751001073, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09094514318004775, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901162, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2273 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1406.0, + "completions/mean_length": 1175.1875, + "completions/mean_terminated_length": 1153.533447265625, + "completions/min_length": 744.0, + "completions/min_terminated_length": 744.0, + "epoch": 0.568642160540135, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.070015421388485, + "kl": 0.0167083740234375, + "learning_rate": 5.211484009797015e-07, + "loss": 0.0141, + "num_tokens": 102816626.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.044004201889038, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.004949589495728125, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03184274342163422, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2274 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1432.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1191.625, + "completions/mean_terminated_length": 1191.625, + "completions/min_length": 812.0, + "completions/min_terminated_length": 812.0, + "epoch": 0.5688922230557639, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.287279817538231, + "kl": 0.014739990234375, + "learning_rate": 5.207564120141839e-07, + "loss": -0.0087, + "num_tokens": 102866596.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6383461952209473, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12175443979285512, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0867477731483357, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07084150279686703, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2275 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1170.25, + "completions/mean_terminated_length": 1123.1429443359375, + "completions/min_length": 723.0, + "completions/min_terminated_length": 723.0, + "epoch": 0.5691422855713929, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.798017362331581, + "kl": 0.013031005859375, + "learning_rate": 5.203644453313084e-07, + "loss": -0.0121, + "num_tokens": 102909408.0, + "reward": 0.0, + "reward_std": 0.9500021934509277, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.027720412618506073, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.041514722359740804, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2276 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1331.0, + "completions/mean_length": 1179.8125, + "completions/mean_terminated_length": 1073.0833740234375, + "completions/min_length": 672.0, + "completions/min_terminated_length": 672.0, + "epoch": 0.5693923480870218, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3395280130517118, + "kl": 0.01641845703125, + "learning_rate": 5.199725012297402e-07, + "loss": 0.008, + "num_tokens": 102953965.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8145663142204285, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03775260205550193, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09674526442519897, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13526380260918403, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2277 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1224.0, + "completions/max_terminated_length": 1224.0, + "completions/mean_length": 1042.875, + "completions/mean_terminated_length": 1042.875, + "completions/min_length": 792.0, + "completions/min_terminated_length": 792.0, + "epoch": 0.5696424106026506, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.997733005047871, + "kl": 0.013824462890625, + "learning_rate": 5.195805800081278e-07, + "loss": 0.0021, + "num_tokens": 102985699.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.0609701871871948, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.020432811953444412, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03250151908774789, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0843274042711568, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2278 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1320.75, + "completions/mean_terminated_length": 1295.1429443359375, + "completions/min_length": 1053.0, + "completions/min_terminated_length": 1053.0, + "epoch": 0.5698924731182796, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6287358155879357, + "kl": 0.0098724365234375, + "learning_rate": 5.191886819651025e-07, + "loss": 0.0224, + "num_tokens": 103038671.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0317494869232178, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05418550162838451, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0741800903851783, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746356, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2279 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1363.0, + "completions/mean_length": 1293.5, + "completions/mean_terminated_length": 1199.6363525390625, + "completions/min_length": 1004.0, + "completions/min_terminated_length": 1004.0, + "epoch": 0.5701425356339085, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.964956564716972, + "kl": 0.016693115234375, + "learning_rate": 5.187968073992772e-07, + "loss": 0.0383, + "num_tokens": 103078695.0, + "reward": 0.0, + "reward_std": 0.8474757671356201, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03171663617418975, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07152842361979823, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07084150279686706, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2280 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1348.5625, + "completions/mean_terminated_length": 1197.125, + "completions/min_length": 723.0, + "completions/min_terminated_length": 723.0, + "epoch": 0.5703925981495374, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.91998151066254, + "kl": 0.017791748046875, + "learning_rate": 5.184049566092474e-07, + "loss": -0.0025, + "num_tokens": 103136784.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0586646795272827, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07752891555346304, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13358655819484133, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408154, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2281 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1207.9375, + "completions/mean_terminated_length": 1166.21435546875, + "completions/min_length": 927.0, + "completions/min_terminated_length": 927.0, + "epoch": 0.5706426606651663, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9544054907936137, + "kl": 0.015838623046875, + "learning_rate": 5.180131298935902e-07, + "loss": -0.0269, + "num_tokens": 103174975.0, + "reward": 0.0, + "reward_std": 0.7632911205291748, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08181987811485145, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19981848972377658, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1377060745318193, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2282 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1207.5, + "completions/mean_terminated_length": 1188.0001220703125, + "completions/min_length": 726.0, + "completions/min_terminated_length": 726.0, + "epoch": 0.5708927231807952, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8323864006608312, + "kl": 0.027130126953125, + "learning_rate": 5.176213275508647e-07, + "loss": -0.002, + "num_tokens": 103215487.0, + "reward": 0.0, + "reward_std": 0.5113885998725891, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.048710855597008544, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07642842033918416, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078611, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2283 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1408.0, + "completions/mean_terminated_length": 1316.0, + "completions/min_length": 1089.0, + "completions/min_terminated_length": 1089.0, + "epoch": 0.5711427856964241, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.446269268326727, + "kl": 0.01776123046875, + "learning_rate": 5.172295498796115e-07, + "loss": 0.0032, + "num_tokens": 103271759.0, + "reward": 0.0, + "reward_std": 0.9824569225311279, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.023129024811724137, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.046279882446526636, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06666666666666668, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2284 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1186.5, + "completions/mean_terminated_length": 1165.60009765625, + "completions/min_length": 784.0, + "completions/min_terminated_length": 784.0, + "epoch": 0.571392848212053, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2429904456714658, + "kl": 0.016265869140625, + "learning_rate": 5.168377971783516e-07, + "loss": -0.0741, + "num_tokens": 103312303.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9949912428855896, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.019374082868693347, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07450614452466797, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11917929226045819, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2285 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1265.75, + "completions/mean_terminated_length": 1232.2857666015625, + "completions/min_length": 1021.0, + "completions/min_terminated_length": 1021.0, + "epoch": 0.5716429107276819, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.695644416794078, + "kl": 0.017364501953125, + "learning_rate": 5.164460697455879e-07, + "loss": -0.0049, + "num_tokens": 103364211.0, + "reward": 0.0, + "reward_std": 0.6433941125869751, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06757091527212487, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13743507389855616, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10036968702787749, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2286 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1275.0, + "completions/max_terminated_length": 1275.0, + "completions/mean_length": 1052.0, + "completions/mean_terminated_length": 1052.0, + "completions/min_length": 891.0, + "completions/min_terminated_length": 891.0, + "epoch": 0.5718929732433108, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7046773236625468, + "kl": 0.0104827880859375, + "learning_rate": 5.160543678798038e-07, + "loss": -0.0534, + "num_tokens": 103407419.0, + "reward": 0.0, + "reward_std": 0.76474928855896, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09612164734239444, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10661260184032867, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.056927504255331135, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2287 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1276.5625, + "completions/mean_terminated_length": 1142.5, + "completions/min_length": 885.0, + "completions/min_terminated_length": 885.0, + "epoch": 0.5721430357589398, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0994408557714213, + "kl": 0.014404296875, + "learning_rate": 5.156626918794628e-07, + "loss": 0.0005, + "num_tokens": 103456684.0, + "reward": 0.0, + "reward_std": 0.9802349805831909, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03374633736221917, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08113840066060099, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10945995377982529, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2288 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1308.375, + "completions/mean_terminated_length": 1193.4000244140625, + "completions/min_length": 902.0, + "completions/min_terminated_length": 902.0, + "epoch": 0.5723930982745686, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0411988351004795, + "kl": 0.0140228271484375, + "learning_rate": 5.152710420430091e-07, + "loss": 0.0438, + "num_tokens": 103504554.0, + "reward": 0.0, + "reward_std": 0.973111629486084, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04426464428518089, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06682593773687204, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10174405069512348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2289 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1443.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 873.0625, + "completions/mean_terminated_length": 873.0625, + "completions/min_length": 594.0, + "completions/min_terminated_length": 594.0, + "epoch": 0.5726431607901975, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.057134763261328, + "kl": 0.0163726806640625, + "learning_rate": 5.14879418668867e-07, + "loss": 0.0153, + "num_tokens": 103553427.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6353734731674194, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.125052976248563, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18879379478564023, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036262, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2290 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1288.25, + "completions/mean_terminated_length": 1258.0, + "completions/min_length": 897.0, + "completions/min_terminated_length": 897.0, + "epoch": 0.5728932233058265, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0366683844903744, + "kl": 0.0139923095703125, + "learning_rate": 5.144878220554404e-07, + "loss": -0.0335, + "num_tokens": 103601295.0, + "reward": 0.0, + "reward_std": 0.4819943308830261, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12587449338112952, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19916620763701326, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639736, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2291 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1355.0, + "completions/mean_length": 1366.1875, + "completions/mean_terminated_length": 1194.1429443359375, + "completions/min_length": 1071.0, + "completions/min_terminated_length": 1071.0, + "epoch": 0.5731432858214554, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5252863203872895, + "kl": 0.0140838623046875, + "learning_rate": 5.140962525011126e-07, + "loss": 0.0115, + "num_tokens": 103657138.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0653834342956543, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010526880854376313, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.050183188630236554, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563383, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2292 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1427.0, + "completions/mean_length": 1255.9375, + "completions/mean_terminated_length": 1239.666748046875, + "completions/min_length": 1044.0, + "completions/min_terminated_length": 1044.0, + "epoch": 0.5733933483370842, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.279724268244194, + "kl": 0.01036834716796875, + "learning_rate": 5.13704710304247e-07, + "loss": -0.0631, + "num_tokens": 103708553.0, + "reward": -3.3527612686157227e-08, + "reward_std": 1.0610902309417725, + "rewards/wordcountpos_reward_GEOBench/mean": -3.3527612686157227e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.026882299611282603, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.047550325552072725, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2293 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 1280.25, + "completions/mean_terminated_length": 1109.3333740234375, + "completions/min_length": 959.0, + "completions/min_terminated_length": 959.0, + "epoch": 0.5736434108527132, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6892578377781704, + "kl": 0.0135345458984375, + "learning_rate": 5.133131957631857e-07, + "loss": -0.0042, + "num_tokens": 103755221.0, + "reward": 2.60770320892334e-08, + "reward_std": 1.0610355138778687, + "rewards/wordcountpos_reward_GEOBench/mean": 2.60770320892334e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.010374819678686684, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04583261883163071, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06763190130459201, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2294 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1377.0, + "completions/mean_length": 1209.25, + "completions/mean_terminated_length": 1142.1539306640625, + "completions/min_length": 827.0, + "completions/min_terminated_length": 827.0, + "epoch": 0.5738934733683421, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0892798181918715, + "kl": 0.0179901123046875, + "learning_rate": 5.129217091762496e-07, + "loss": -0.0598, + "num_tokens": 103805473.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0379133224487305, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.024180278079784887, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07838842601676105, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000003, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2295 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1421.0, + "completions/max_terminated_length": 1421.0, + "completions/mean_length": 1108.1875, + "completions/mean_terminated_length": 1108.1875, + "completions/min_length": 904.0, + "completions/min_terminated_length": 904.0, + "epoch": 0.574143535883971, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.324454292918934, + "kl": 0.0159912109375, + "learning_rate": 5.125302508417384e-07, + "loss": 0.0041, + "num_tokens": 103861676.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.5368005037307739, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05839738284899765, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16410637637787132, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466156, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2296 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1110.125, + "completions/mean_terminated_length": 1054.4285888671875, + "completions/min_length": 639.0, + "completions/min_terminated_length": 639.0, + "epoch": 0.5743935983995999, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.367112375864426, + "kl": 0.0197601318359375, + "learning_rate": 5.121388210579305e-07, + "loss": -0.0679, + "num_tokens": 103904550.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7586082816123962, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.030199245653825094, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1957231781761579, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1140987226857449, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2297 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1225.875, + "completions/mean_terminated_length": 1207.60009765625, + "completions/min_length": 1004.0, + "completions/min_terminated_length": 1004.0, + "epoch": 0.5746436609152288, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.622163769395901, + "kl": 0.0159912109375, + "learning_rate": 5.117474201230824e-07, + "loss": -0.0191, + "num_tokens": 103953716.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0495389699935913, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03192512281727409, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04879614941785051, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.131021626713557, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2298 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1053.0, + "completions/mean_terminated_length": 1023.2000732421875, + "completions/min_length": 739.0, + "completions/min_terminated_length": 739.0, + "epoch": 0.5748937234308578, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.479681015941143, + "kl": 0.021484375, + "learning_rate": 5.113560483354284e-07, + "loss": -0.0554, + "num_tokens": 103987108.0, + "reward": 1.862645149230957e-09, + "reward_std": 0.940168023109436, + "rewards/wordcountpos_reward_GEOBench/mean": 1.862645149230957e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194865, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2299 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1449.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1212.625, + "completions/mean_terminated_length": 1212.625, + "completions/min_length": 1031.0, + "completions/min_terminated_length": 1031.0, + "epoch": 0.5751437859464866, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1430590999483052, + "kl": 0.01544189453125, + "learning_rate": 5.109647059931812e-07, + "loss": 0.006, + "num_tokens": 104035270.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0498970746994019, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.25360872886550334, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2446843415445637, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08944271909999159, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2300 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1181.5, + "completions/mean_terminated_length": 1108.0, + "completions/min_length": 797.0, + "completions/min_terminated_length": 797.0, + "epoch": 0.5753938484621155, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.795473028511627, + "kl": 0.0140228271484375, + "learning_rate": 5.105733933945302e-07, + "loss": -0.0753, + "num_tokens": 104081222.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9509678483009338, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03519810157964835, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05285735770232731, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066474, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2301 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1371.0, + "completions/mean_length": 1146.8125, + "completions/mean_terminated_length": 1123.2667236328125, + "completions/min_length": 974.0, + "completions/min_terminated_length": 974.0, + "epoch": 0.5756439109777445, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.511384521942019, + "kl": 0.0216064453125, + "learning_rate": 5.101821108376429e-07, + "loss": -0.0485, + "num_tokens": 104134339.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9732698202133179, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.18565960544572616, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12272850130993569, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353543, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2302 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1120.4375, + "completions/mean_terminated_length": 1095.1334228515625, + "completions/min_length": 571.0, + "completions/min_terminated_length": 571.0, + "epoch": 0.5758939734933733, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.425569168768857, + "kl": 0.019622802734375, + "learning_rate": 5.097908586206632e-07, + "loss": -0.0718, + "num_tokens": 104180082.0, + "reward": 0.0, + "reward_std": 0.8185616731643677, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0390544772535094, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0798775138460345, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11279282877125756, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2303 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1200.0, + "completions/mean_terminated_length": 1157.1429443359375, + "completions/min_length": 726.0, + "completions/min_terminated_length": 726.0, + "epoch": 0.5761440360090022, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8470023249026624, + "kl": 0.01507568359375, + "learning_rate": 5.09399637041713e-07, + "loss": 0.0085, + "num_tokens": 104233250.0, + "reward": 0.0, + "reward_std": 0.9341722726821899, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03617654291569874, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07954376681040111, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12995725793078622, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2304 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1215.0, + "completions/mean_length": 1095.8125, + "completions/mean_terminated_length": 1068.86669921875, + "completions/min_length": 893.0, + "completions/min_terminated_length": 893.0, + "epoch": 0.5763940985246312, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3537682208028614, + "kl": 0.018280029296875, + "learning_rate": 5.090084463988894e-07, + "loss": -0.0199, + "num_tokens": 104274575.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9561659097671509, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.012816484279121411, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04855012037530993, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2305 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1301.0, + "completions/mean_length": 1039.75, + "completions/mean_terminated_length": 1009.0667114257812, + "completions/min_length": 778.0, + "completions/min_terminated_length": 778.0, + "epoch": 0.5766441610402601, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9952727536836417, + "kl": 0.01058197021484375, + "learning_rate": 5.086172869902671e-07, + "loss": -0.0287, + "num_tokens": 104325595.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8539559841156006, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1704630068207695, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07899746258206725, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2306 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1328.0, + "completions/max_terminated_length": 1328.0, + "completions/mean_length": 1167.3125, + "completions/mean_terminated_length": 1167.3125, + "completions/min_length": 982.0, + "completions/min_terminated_length": 982.0, + "epoch": 0.5768942235558889, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0195940755660344, + "kl": 0.016876220703125, + "learning_rate": 5.082261591138964e-07, + "loss": 0.0074, + "num_tokens": 104359728.0, + "reward": 1.862645149230957e-08, + "reward_std": 0.9818234443664551, + "rewards/wordcountpos_reward_GEOBench/mean": 1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.060889306467360196, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.105062218705018, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237132, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2307 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1304.0, + "completions/mean_length": 1389.8125, + "completions/mean_terminated_length": 1206.166748046875, + "completions/min_length": 1078.0, + "completions/min_terminated_length": 1078.0, + "epoch": 0.5771442860715179, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6649174812655354, + "kl": 0.0119171142578125, + "learning_rate": 5.078350630678038e-07, + "loss": 0.0372, + "num_tokens": 104416197.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0019190311431885, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02107828794111348, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03252213782920542, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639735, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2308 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1204.0, + "completions/max_terminated_length": 1204.0, + "completions/mean_length": 1021.5625, + "completions/mean_terminated_length": 1021.5625, + "completions/min_length": 812.0, + "completions/min_terminated_length": 812.0, + "epoch": 0.5773943485871468, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.718485538102207, + "kl": 0.016571044921875, + "learning_rate": 5.074439991499914e-07, + "loss": -0.0217, + "num_tokens": 104451790.0, + "reward": 0.0, + "reward_std": 0.9291521310806274, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.034773201999751625, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10264423981803664, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12464765155042849, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2309 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1172.0, + "completions/max_terminated_length": 1172.0, + "completions/mean_length": 998.375, + "completions/mean_terminated_length": 998.375, + "completions/min_length": 699.0, + "completions/min_terminated_length": 699.0, + "epoch": 0.5776444111027756, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4616951261636713, + "kl": 0.0148773193359375, + "learning_rate": 5.070529676584369e-07, + "loss": -0.0228, + "num_tokens": 104484004.0, + "reward": 0.0, + "reward_std": 0.8387163877487183, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08927109054185563, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07622623549617832, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1067187372905475, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2310 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.75, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1416.75, + "completions/mean_terminated_length": 1167.0, + "completions/min_length": 842.0, + "completions/min_terminated_length": 842.0, + "epoch": 0.5778944736184046, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5512401348389466, + "kl": 0.0103912353515625, + "learning_rate": 5.066619688910934e-07, + "loss": 0.0063, + "num_tokens": 104543192.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8688246011734009, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05288384518452093, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07848944547234524, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11213417888437975, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2311 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 1189.875, + "completions/mean_terminated_length": 1118.3077392578125, + "completions/min_length": 884.0, + "completions/min_terminated_length": 884.0, + "epoch": 0.5781445361340335, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0584256424232277, + "kl": 0.01837158203125, + "learning_rate": 5.062710031458891e-07, + "loss": 0.0193, + "num_tokens": 104582366.0, + "reward": 0.0, + "reward_std": 0.924498438835144, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1450836807767055, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20145628175559227, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.102469507659596, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2312 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1344.3125, + "completions/mean_terminated_length": 1273.5455322265625, + "completions/min_length": 1013.0, + "completions/min_terminated_length": 1013.0, + "epoch": 0.5783945986496624, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8662202939830683, + "kl": 0.020355224609375, + "learning_rate": 5.058800707207265e-07, + "loss": -0.0083, + "num_tokens": 104644067.0, + "reward": 3.725290298461914e-09, + "reward_std": 0.9693636894226074, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02065141161662314, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06902186832519677, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10174405069512346, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2313 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1295.0, + "completions/max_terminated_length": 1295.0, + "completions/mean_length": 1080.8125, + "completions/mean_terminated_length": 1080.8125, + "completions/min_length": 814.0, + "completions/min_terminated_length": 814.0, + "epoch": 0.5786446611652913, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.322428168530861, + "kl": 0.014434814453125, + "learning_rate": 5.054891719134835e-07, + "loss": 0.0215, + "num_tokens": 104680552.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0097395181655884, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0029953544593396934, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.025905185560683518, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2314 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1207.5625, + "completions/mean_terminated_length": 1140.0770263671875, + "completions/min_length": 920.0, + "completions/min_terminated_length": 920.0, + "epoch": 0.5788947236809202, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.418899791311373, + "kl": 0.009613037109375, + "learning_rate": 5.050983070220119e-07, + "loss": -0.0055, + "num_tokens": 104723977.0, + "reward": 0.0, + "reward_std": 0.8352043628692627, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.009413261921574014, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.047485291319477305, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0739118594202782, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2315 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1321.5625, + "completions/mean_terminated_length": 1214.5, + "completions/min_length": 1043.0, + "completions/min_terminated_length": 1043.0, + "epoch": 0.5791447861965492, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6840377711480743, + "kl": 0.0118865966796875, + "learning_rate": 5.047074763441374e-07, + "loss": 0.0349, + "num_tokens": 104772186.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7862033843994141, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07520682602406713, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07205604681192407, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8250000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639733, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2316 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1360.125, + "completions/mean_terminated_length": 1296.5455322265625, + "completions/min_length": 1052.0, + "completions/min_terminated_length": 1052.0, + "epoch": 0.5793948487121781, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.669808322727827, + "kl": 0.023468017578125, + "learning_rate": 5.043166801776605e-07, + "loss": 0.0572, + "num_tokens": 104837356.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0452367067337036, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05100119669506083, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07702398516889292, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087684, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2317 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1372.5, + "completions/mean_terminated_length": 1273.3333740234375, + "completions/min_length": 907.0, + "completions/min_terminated_length": 907.0, + "epoch": 0.5796449112278069, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.032329717078327, + "kl": 0.0233154296875, + "learning_rate": 5.039259188203546e-07, + "loss": -0.0145, + "num_tokens": 104889252.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9315700531005859, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05345970765007264, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07868895693370526, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2318 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1192.0, + "completions/max_terminated_length": 1192.0, + "completions/mean_length": 939.5, + "completions/mean_terminated_length": 939.5, + "completions/min_length": 626.0, + "completions/min_terminated_length": 626.0, + "epoch": 0.5798949737434359, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1991061917891064, + "kl": 0.0151519775390625, + "learning_rate": 5.03535192569967e-07, + "loss": -0.0027, + "num_tokens": 104934900.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9829269647598267, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06387282066943338, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09522799802657679, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2319 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1096.9375, + "completions/mean_terminated_length": 1003.923095703125, + "completions/min_length": 849.0, + "completions/min_terminated_length": 849.0, + "epoch": 0.5801450362590648, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3232708716068076, + "kl": 0.0140838623046875, + "learning_rate": 5.031445017242179e-07, + "loss": 0.0126, + "num_tokens": 104981027.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0038211345672607, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.024754677805105298, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07158343765584946, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316813, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2320 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1283.375, + "completions/mean_terminated_length": 1153.4000244140625, + "completions/min_length": 1007.0, + "completions/min_terminated_length": 1007.0, + "epoch": 0.5803950987746936, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.400783966338096, + "kl": 0.011627197265625, + "learning_rate": 5.02753846580801e-07, + "loss": -0.0256, + "num_tokens": 105028257.0, + "reward": 0.0, + "reward_std": 0.7928069233894348, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04876183337207114, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06441144120293646, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09727776191382571, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2321 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1266.0, + "completions/mean_length": 1193.3125, + "completions/mean_terminated_length": 1053.9091796875, + "completions/min_length": 689.0, + "completions/min_terminated_length": 689.0, + "epoch": 0.5806451612903226, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.212652948433277, + "kl": 0.0162353515625, + "learning_rate": 5.023632274373824e-07, + "loss": -0.0702, + "num_tokens": 105067302.0, + "reward": 0.0, + "reward_std": 0.7896625399589539, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.008156474033046658, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08817620898448639, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2322 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1364.0, + "completions/mean_length": 1104.5, + "completions/mean_terminated_length": 1078.1334228515625, + "completions/min_length": 817.0, + "completions/min_terminated_length": 817.0, + "epoch": 0.5808952238059515, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8046019644481515, + "kl": 0.0161285400390625, + "learning_rate": 5.019726445916008e-07, + "loss": 0.007, + "num_tokens": 105112534.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0576317310333252, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07043596379796724, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21934080738275372, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11894598836509011, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2323 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1201.0, + "completions/max_terminated_length": 1201.0, + "completions/mean_length": 992.5625, + "completions/mean_terminated_length": 992.5625, + "completions/min_length": 653.0, + "completions/min_terminated_length": 653.0, + "epoch": 0.5811452863215804, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.801137389995506, + "kl": 0.021728515625, + "learning_rate": 5.015820983410675e-07, + "loss": -0.0124, + "num_tokens": 105153335.0, + "reward": -1.862645149230957e-08, + "reward_std": 1.0543859004974365, + "rewards/wordcountpos_reward_GEOBench/mean": -1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.003960148904984262, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.024594600468338338, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07685966046898342, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2324 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1375.75, + "completions/mean_terminated_length": 1301.2000732421875, + "completions/min_length": 1073.0, + "completions/min_terminated_length": 1073.0, + "epoch": 0.5813953488372093, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.97067658331425, + "kl": 0.0155487060546875, + "learning_rate": 5.011915889833658e-07, + "loss": 0.0201, + "num_tokens": 105209411.0, + "reward": 0.0, + "reward_std": 1.045926809310913, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0016035132600793144, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.056144212715473686, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17191729277636836, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2325 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1437.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1104.5, + "completions/mean_terminated_length": 1104.5, + "completions/min_length": 778.0, + "completions/min_terminated_length": 778.0, + "epoch": 0.5816454113528382, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.058674953616276, + "kl": 0.010284423828125, + "learning_rate": 5.008011168160508e-07, + "loss": -0.0202, + "num_tokens": 105242155.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8234265446662903, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.007206368740159598, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06904297664354865, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12224747213928165, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2326 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1255.0, + "completions/max_terminated_length": 1255.0, + "completions/mean_length": 977.5, + "completions/mean_terminated_length": 977.5, + "completions/min_length": 707.0, + "completions/min_terminated_length": 707.0, + "epoch": 0.5818954738684671, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1527148397636435, + "kl": 0.0155792236328125, + "learning_rate": 5.004106821366491e-07, + "loss": -0.03, + "num_tokens": 105279835.0, + "reward": 0.0, + "reward_std": 0.37447381019592285, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08661794197188188, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.46011530737990775, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7999999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1355373393953503, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2327 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1436.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1113.5625, + "completions/mean_terminated_length": 1113.5625, + "completions/min_length": 794.0, + "completions/min_terminated_length": 794.0, + "epoch": 0.582145536384096, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4923942867476976, + "kl": 0.023193359375, + "learning_rate": 5.000202852426591e-07, + "loss": 0.0102, + "num_tokens": 105313724.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9652321934700012, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.048581979600340955, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1447275518224536, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10183501544346313, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2328 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 1.0, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 0.0, + "completions/mean_length": 1500.0, + "completions/mean_terminated_length": 0.0, + "completions/min_length": 1500.0, + "completions/min_terminated_length": 0.0, + "epoch": 0.5823955988997249, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.303646448851639, + "kl": 0.013092041015625, + "learning_rate": 4.996299264315506e-07, + "loss": 0.0005, + "num_tokens": 105381068.0, + "reward": 2.60770320892334e-08, + "reward_std": 1.0207549333572388, + "rewards/wordcountpos_reward_GEOBench/mean": 2.60770320892334e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.18658737136307346, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15868064155261286, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2329 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1378.0, + "completions/max_terminated_length": 1378.0, + "completions/mean_length": 1063.9375, + "completions/mean_terminated_length": 1063.9375, + "completions/min_length": 888.0, + "completions/min_terminated_length": 888.0, + "epoch": 0.5826456614153538, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7468035886762534, + "kl": 0.012732505798339844, + "learning_rate": 4.992396060007635e-07, + "loss": 0.0339, + "num_tokens": 105423019.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0307469367980957, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04637849474664679, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10977957772603705, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12224747213928168, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2330 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1272.0, + "completions/mean_length": 1319.0, + "completions/mean_terminated_length": 1138.0, + "completions/min_length": 745.0, + "completions/min_terminated_length": 745.0, + "epoch": 0.5828957239309828, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.262652637904417, + "kl": 0.013397216796875, + "learning_rate": 4.98849324247709e-07, + "loss": 0.0158, + "num_tokens": 105475947.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9861156940460205, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0012342430502999373, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08180886044233886, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11155467020454343, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 2331 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1222.875, + "completions/mean_terminated_length": 1183.2857666015625, + "completions/min_length": 1039.0, + "completions/min_terminated_length": 1039.0, + "epoch": 0.5831457864466116, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2179025722336156, + "kl": 0.01763916015625, + "learning_rate": 4.984590814697694e-07, + "loss": -0.0368, + "num_tokens": 105531665.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.0260601043701172, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06012513251481568, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06300779297583965, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14291929864761418, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2332 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1439.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1009.4375, + "completions/mean_terminated_length": 1009.4375, + "completions/min_length": 643.0, + "completions/min_terminated_length": 643.0, + "epoch": 0.5833958489622406, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6309836009533503, + "kl": 0.020233154296875, + "learning_rate": 4.980688779642963e-07, + "loss": -0.0239, + "num_tokens": 105565912.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.011700987815857, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.003759440775125778, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02192667617225081, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2333 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1264.0, + "completions/max_terminated_length": 1264.0, + "completions/mean_length": 1021.5625, + "completions/mean_terminated_length": 1021.5625, + "completions/min_length": 843.0, + "completions/min_terminated_length": 843.0, + "epoch": 0.5836459114778695, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0661153881793295, + "kl": 0.0139312744140625, + "learning_rate": 4.976787140286116e-07, + "loss": -0.0237, + "num_tokens": 105602745.0, + "reward": 0.0, + "reward_std": 1.0013879537582397, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01252735961582118, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11053366547730044, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11080513425729777, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2334 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1254.0, + "completions/max_terminated_length": 1254.0, + "completions/mean_length": 1090.125, + "completions/mean_terminated_length": 1090.125, + "completions/min_length": 833.0, + "completions/min_terminated_length": 833.0, + "epoch": 0.5838959739934984, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7509396148952274, + "kl": 0.031097412109375, + "learning_rate": 4.972885899600078e-07, + "loss": -0.0198, + "num_tokens": 105651667.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0514953136444092, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.029266545257416832, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08740169224888983, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746357, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2335 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1100.0, + "completions/max_terminated_length": 1100.0, + "completions/mean_length": 834.1875, + "completions/mean_terminated_length": 834.1875, + "completions/min_length": 595.0, + "completions/min_terminated_length": 595.0, + "epoch": 0.5841460365091273, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.183117561528536, + "kl": 0.017364501953125, + "learning_rate": 4.96898506055746e-07, + "loss": -0.008, + "num_tokens": 105697670.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8326643705368042, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.003063558878667985, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05682051500067662, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12164002752505568, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2336 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1249.0625, + "completions/mean_terminated_length": 1213.21435546875, + "completions/min_length": 1084.0, + "completions/min_terminated_length": 1084.0, + "epoch": 0.5843960990247562, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0971474360840627, + "kl": 0.01654052734375, + "learning_rate": 4.965084626130574e-07, + "loss": -0.0107, + "num_tokens": 105752175.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0601084232330322, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0036728631416773263, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05412711934254774, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06978803887752094, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2337 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1476.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1165.8125, + "completions/mean_terminated_length": 1165.8125, + "completions/min_length": 1002.0, + "completions/min_terminated_length": 1002.0, + "epoch": 0.5846461615403851, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.24101348380296, + "kl": 0.0196533203125, + "learning_rate": 4.96118459929142e-07, + "loss": -0.0293, + "num_tokens": 105798028.0, + "reward": 0.0, + "reward_std": 0.8512970209121704, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04614533580084446, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.048012866398684975, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10532137766186216, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2338 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1301.0, + "completions/mean_length": 1243.875, + "completions/mean_terminated_length": 987.75, + "completions/min_length": 764.0, + "completions/min_terminated_length": 764.0, + "epoch": 0.584896224056014, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.558162492178964, + "kl": 0.010101318359375, + "learning_rate": 4.95728498301169e-07, + "loss": -0.0091, + "num_tokens": 105851762.0, + "reward": 0.0, + "reward_std": 0.655755341053009, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.19226572980397147, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11810194124085639, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2339 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1159.9375, + "completions/mean_terminated_length": 1081.4615478515625, + "completions/min_length": 903.0, + "completions/min_terminated_length": 903.0, + "epoch": 0.5851462865716429, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7532520920147974, + "kl": 0.012664794921875, + "learning_rate": 4.953385780262762e-07, + "loss": 0.0313, + "num_tokens": 105893849.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9899556636810303, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07399043183692194, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07363565653200678, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2340 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1289.0, + "completions/max_terminated_length": 1289.0, + "completions/mean_length": 1037.0625, + "completions/mean_terminated_length": 1037.0625, + "completions/min_length": 842.0, + "completions/min_terminated_length": 842.0, + "epoch": 0.5853963490872718, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.439979745033483, + "kl": 0.0147857666015625, + "learning_rate": 4.949486994015698e-07, + "loss": 0.0156, + "num_tokens": 105929146.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.48622292280197144, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08525436072876776, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06932472706717835, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2341 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1116.0, + "completions/max_terminated_length": 1116.0, + "completions/mean_length": 1001.625, + "completions/mean_terminated_length": 1001.625, + "completions/min_length": 723.0, + "completions/min_terminated_length": 723.0, + "epoch": 0.5856464116029008, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1481872267092723, + "kl": 0.0110931396484375, + "learning_rate": 4.945588627241244e-07, + "loss": -0.0378, + "num_tokens": 105958852.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0027930736541748, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14950535726806533, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2342 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1330.0, + "completions/max_terminated_length": 1330.0, + "completions/mean_length": 1135.5625, + "completions/mean_terminated_length": 1135.5625, + "completions/min_length": 1011.0, + "completions/min_terminated_length": 1011.0, + "epoch": 0.5858964741185296, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.421076649969931, + "kl": 0.0093231201171875, + "learning_rate": 4.941690682909826e-07, + "loss": 0.0008, + "num_tokens": 105995085.0, + "reward": 0.0, + "reward_std": 0.7803366780281067, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07763483006246244, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16683255954424375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10878112581387149, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2343 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1000.875, + "completions/mean_terminated_length": 967.6000366210938, + "completions/min_length": 546.0, + "completions/min_terminated_length": 546.0, + "epoch": 0.5861465366341585, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4023027951273006, + "kl": 0.015411376953125, + "learning_rate": 4.937793163991549e-07, + "loss": -0.0581, + "num_tokens": 106036995.0, + "reward": -3.3527612686157227e-08, + "reward_std": 1.0651626586914062, + "rewards/wordcountpos_reward_GEOBench/mean": -3.3527612686157227e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12855758542021398, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09630845535346613, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0958393717904348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2344 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1156.1875, + "completions/mean_terminated_length": 1107.071533203125, + "completions/min_length": 786.0, + "completions/min_terminated_length": 786.0, + "epoch": 0.5863965991497875, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.636472039062007, + "kl": 0.0141448974609375, + "learning_rate": 4.933896073456189e-07, + "loss": -0.0039, + "num_tokens": 106087518.0, + "reward": 0.0, + "reward_std": 0.8858144283294678, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.051283084274380525, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18911113971204443, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06871842709362772, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2345 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1435.0, + "completions/mean_length": 1367.125, + "completions/mean_terminated_length": 1263.77783203125, + "completions/min_length": 1122.0, + "completions/min_terminated_length": 1122.0, + "epoch": 0.5866466616654163, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.932565707346272, + "kl": 0.019683837890625, + "learning_rate": 4.929999414273204e-07, + "loss": -0.0321, + "num_tokens": 106136256.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.6809786558151245, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09129670528395444, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09841411804597702, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746356, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2346 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1283.0, + "completions/mean_length": 1267.125, + "completions/mean_terminated_length": 1034.25, + "completions/min_length": 573.0, + "completions/min_terminated_length": 573.0, + "epoch": 0.5868967241810452, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.309856623760675, + "kl": 0.0154266357421875, + "learning_rate": 4.926103189411718e-07, + "loss": -0.0248, + "num_tokens": 106193098.0, + "reward": 0.0, + "reward_std": 1.0008400678634644, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1942744201674646, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.24689157654808414, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11147163731607215, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2347 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1307.8125, + "completions/mean_terminated_length": 1280.357177734375, + "completions/min_length": 939.0, + "completions/min_terminated_length": 939.0, + "epoch": 0.5871467866966742, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.95738471694819, + "kl": 0.0173492431640625, + "learning_rate": 4.92220740184052e-07, + "loss": -0.0173, + "num_tokens": 106234919.0, + "reward": 0.0, + "reward_std": 0.9505593180656433, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.016862511107243856, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03375932019019097, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09108400680852977, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2348 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1419.0, + "completions/max_terminated_length": 1419.0, + "completions/mean_length": 1088.0625, + "completions/mean_terminated_length": 1088.0625, + "completions/min_length": 716.0, + "completions/min_terminated_length": 716.0, + "epoch": 0.5873968492123031, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.502598233201184, + "kl": 0.016448974609375, + "learning_rate": 4.918312054528076e-07, + "loss": -0.0484, + "num_tokens": 106269048.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6360487341880798, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.16619649780590412, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.24556857332389873, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16459827639617797, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2349 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1237.0625, + "completions/mean_terminated_length": 1219.533447265625, + "completions/min_length": 986.0, + "completions/min_terminated_length": 986.0, + "epoch": 0.5876469117279319, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.461321113036791, + "kl": 0.019134521484375, + "learning_rate": 4.914417150442512e-07, + "loss": -0.0134, + "num_tokens": 106321433.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9944944381713867, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01038684416429495, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11643990634292298, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353543, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2350 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1198.625, + "completions/mean_terminated_length": 1178.533447265625, + "completions/min_length": 965.0, + "completions/min_terminated_length": 965.0, + "epoch": 0.5878969742435609, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.388360182121188, + "kl": 0.0177459716796875, + "learning_rate": 4.910522692551612e-07, + "loss": 0.0437, + "num_tokens": 106365603.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.010790467262268, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.010282676020010324, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.025280176939637255, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08944271909999162, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2351 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1405.0, + "completions/mean_length": 1275.375, + "completions/mean_terminated_length": 1200.5, + "completions/min_length": 1039.0, + "completions/min_terminated_length": 1039.0, + "epoch": 0.5881470367591898, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.716583056433274, + "kl": 0.0178985595703125, + "learning_rate": 4.906628683822822e-07, + "loss": 0.0084, + "num_tokens": 106424017.0, + "reward": 0.0, + "reward_std": 0.8479743003845215, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06926453266238154, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09258409115516898, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10749676997731401, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2352 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1326.0, + "completions/max_terminated_length": 1326.0, + "completions/mean_length": 1079.0, + "completions/mean_terminated_length": 1079.0, + "completions/min_length": 755.0, + "completions/min_terminated_length": 755.0, + "epoch": 0.5883970992748188, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9441891882553866, + "kl": 0.0146331787109375, + "learning_rate": 4.902735127223251e-07, + "loss": 0.0368, + "num_tokens": 106470945.0, + "reward": 0.0, + "reward_std": 0.7976610660552979, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0353971318104709, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06502572997175982, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1128748897706693, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2353 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1282.1875, + "completions/mean_terminated_length": 1267.666748046875, + "completions/min_length": 910.0, + "completions/min_terminated_length": 910.0, + "epoch": 0.5886471617904476, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.868453446842013, + "kl": 0.01220703125, + "learning_rate": 4.898842025719659e-07, + "loss": -0.0123, + "num_tokens": 106520580.0, + "reward": 0.0, + "reward_std": 0.8940305709838867, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02910493343019158, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06310329466672525, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07698003589195011, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2354 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1231.6875, + "completions/mean_terminated_length": 1213.800048828125, + "completions/min_length": 829.0, + "completions/min_terminated_length": 829.0, + "epoch": 0.5888972243060765, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0692192530350106, + "kl": 0.01690673828125, + "learning_rate": 4.894949382278458e-07, + "loss": 0.0256, + "num_tokens": 106572279.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8699038624763489, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11325445768962623, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09502597311045116, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11279282877125757, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2355 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1376.8125, + "completions/mean_terminated_length": 1302.9000244140625, + "completions/min_length": 1009.0, + "completions/min_terminated_length": 1009.0, + "epoch": 0.5891472868217055, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5217946061181307, + "kl": 0.0125579833984375, + "learning_rate": 4.891057199865715e-07, + "loss": -0.0381, + "num_tokens": 106617100.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6105935573577881, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.029587688227346892, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09255334394673266, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2356 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1271.0, + "completions/max_terminated_length": 1271.0, + "completions/mean_length": 1080.0625, + "completions/mean_terminated_length": 1080.0625, + "completions/min_length": 883.0, + "completions/min_terminated_length": 883.0, + "epoch": 0.5893973493373343, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7989422663166508, + "kl": 0.0122222900390625, + "learning_rate": 4.887165481447143e-07, + "loss": -0.0098, + "num_tokens": 106656501.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5585763454437256, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04651117362271162, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17538767239072559, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620107, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 2357 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1482.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1066.5625, + "completions/mean_terminated_length": 1066.5625, + "completions/min_length": 612.0, + "completions/min_terminated_length": 612.0, + "epoch": 0.5896474118529632, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.136494524829353, + "kl": 0.017059326171875, + "learning_rate": 4.883274229988101e-07, + "loss": 0.0259, + "num_tokens": 106700198.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6926990747451782, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.16800356598886396, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11966865935917523, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14981470036162822, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2358 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1118.0, + "completions/max_terminated_length": 1118.0, + "completions/mean_length": 884.375, + "completions/mean_terminated_length": 884.375, + "completions/min_length": 577.0, + "completions/min_terminated_length": 577.0, + "epoch": 0.5898974743685922, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6089452392867147, + "kl": 0.020721435546875, + "learning_rate": 4.879383448453593e-07, + "loss": -0.0034, + "num_tokens": 106730204.0, + "reward": 0.0, + "reward_std": 0.6878067255020142, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019493466549738957, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06628260708175679, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08062257748298553, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2359 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1097.75, + "completions/mean_terminated_length": 1004.923095703125, + "completions/min_length": 655.0, + "completions/min_terminated_length": 655.0, + "epoch": 0.5901475368842211, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1295478225630986, + "kl": 0.019744873046875, + "learning_rate": 4.875493139808268e-07, + "loss": -0.066, + "num_tokens": 106769536.0, + "reward": -2.421438694000244e-08, + "reward_std": 1.0566116571426392, + "rewards/wordcountpos_reward_GEOBench/mean": -2.421438694000244e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010379814455377397, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05229233093284781, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066221, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2360 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1376.9375, + "completions/mean_terminated_length": 1218.71435546875, + "completions/min_length": 1153.0, + "completions/min_terminated_length": 1153.0, + "epoch": 0.5903975993998499, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8809245280719704, + "kl": 0.0145111083984375, + "learning_rate": 4.87160330701641e-07, + "loss": 0.0158, + "num_tokens": 106823623.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9902392625808716, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.036052092756361884, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0989735754465342, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620105, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2361 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1192.875, + "completions/mean_terminated_length": 1122.0, + "completions/min_length": 899.0, + "completions/min_terminated_length": 899.0, + "epoch": 0.5906476619154789, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.54222201244384, + "kl": 0.019439697265625, + "learning_rate": 4.867713953041941e-07, + "loss": -0.059, + "num_tokens": 106876517.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9885536432266235, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03362486390353074, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.061867958998612256, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12171612389003693, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2362 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1469.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 981.625, + "completions/mean_terminated_length": 981.625, + "completions/min_length": 722.0, + "completions/min_terminated_length": 722.0, + "epoch": 0.5908977244311078, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.541487479025352, + "kl": 0.01959228515625, + "learning_rate": 4.863825080848422e-07, + "loss": 0.0621, + "num_tokens": 106913743.0, + "reward": 0.0, + "reward_std": 0.4741396903991699, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1949644905298815, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.28409618078596355, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715323, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2363 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1374.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 1095.0625, + "completions/mean_terminated_length": 1095.0625, + "completions/min_length": 900.0, + "completions/min_terminated_length": 900.0, + "epoch": 0.5911477869467366, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1102650508015914, + "kl": 0.0123138427734375, + "learning_rate": 4.859936693399044e-07, + "loss": -0.0396, + "num_tokens": 106957088.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.4989186227321625, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005947638859701947, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09936162472261904, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09428090415820635, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2364 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1311.0, + "completions/mean_terminated_length": 1225.0909423828125, + "completions/min_length": 776.0, + "completions/min_terminated_length": 776.0, + "epoch": 0.5913978494623656, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.934767477615349, + "kl": 0.01678466796875, + "learning_rate": 4.856048793656627e-07, + "loss": 0.0091, + "num_tokens": 107012192.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8739602565765381, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07611459922309866, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09330237607626106, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11213417888437975, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2365 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1382.0, + "completions/mean_length": 1307.625, + "completions/mean_terminated_length": 1220.181884765625, + "completions/min_length": 752.0, + "completions/min_terminated_length": 752.0, + "epoch": 0.5916479119779945, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1659387813610618, + "kl": 0.01739501953125, + "learning_rate": 4.85216138458362e-07, + "loss": -0.0372, + "num_tokens": 107068122.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.035326600074768, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03635283427242729, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.078854707051858, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466157, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2366 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.75, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1464.625, + "completions/mean_terminated_length": 1358.5, + "completions/min_length": 1282.0, + "completions/min_terminated_length": 1282.0, + "epoch": 0.5918979744936234, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5788024903344944, + "kl": 0.0145721435546875, + "learning_rate": 4.848274469142107e-07, + "loss": 0.0226, + "num_tokens": 107134092.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8757888078689575, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06382428746322007, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05388558948831619, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2367 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1404.0, + "completions/mean_length": 1165.6875, + "completions/mean_terminated_length": 1143.4000244140625, + "completions/min_length": 771.0, + "completions/min_terminated_length": 771.0, + "epoch": 0.5921480370092523, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.246090167565752, + "kl": 0.015167236328125, + "learning_rate": 4.844388050293781e-07, + "loss": 0.0218, + "num_tokens": 107183303.0, + "reward": 4.470348358154297e-08, + "reward_std": 0.9864743947982788, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07897434989186194, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10429916975865149, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13709958532503405, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2368 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1065.5, + "completions/mean_terminated_length": 1036.533447265625, + "completions/min_length": 228.0, + "completions/min_terminated_length": 228.0, + "epoch": 0.5923980995248812, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2886592240598684, + "kl": 0.0142974853515625, + "learning_rate": 4.840502130999964e-07, + "loss": -0.0175, + "num_tokens": 107235847.0, + "reward": 0.0, + "reward_std": 0.6742314696311951, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087681, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2369 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1145.0, + "completions/max_terminated_length": 1145.0, + "completions/mean_length": 968.875, + "completions/mean_terminated_length": 968.875, + "completions/min_length": 745.0, + "completions/min_terminated_length": 745.0, + "epoch": 0.5926481620405101, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0962353575442556, + "kl": 0.0154876708984375, + "learning_rate": 4.836616714221603e-07, + "loss": 0.0279, + "num_tokens": 107265437.0, + "reward": 0.0, + "reward_std": 0.9823238849639893, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02750371964861968, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07853495900221953, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353545, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2370 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1152.0, + "completions/mean_length": 920.625, + "completions/mean_terminated_length": 882.0000610351562, + "completions/min_length": 529.0, + "completions/min_terminated_length": 529.0, + "epoch": 0.592898224556139, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.337663245183706, + "kl": 0.0167694091796875, + "learning_rate": 4.832731802919253e-07, + "loss": -0.0301, + "num_tokens": 107295519.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0225204229354858, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02435671276018516, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0537425891074909, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09953596037316066, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2371 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1492.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1156.25, + "completions/mean_terminated_length": 1156.25, + "completions/min_length": 859.0, + "completions/min_terminated_length": 859.0, + "epoch": 0.5931482870717679, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3338135724232014, + "kl": 0.013458251953125, + "learning_rate": 4.828847400053085e-07, + "loss": -0.058, + "num_tokens": 107325683.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0593135356903076, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.012252145126998024, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08992019835841927, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8250000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2372 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1244.0, + "completions/max_terminated_length": 1244.0, + "completions/mean_length": 957.25, + "completions/mean_terminated_length": 957.25, + "completions/min_length": 583.0, + "completions/min_terminated_length": 583.0, + "epoch": 0.5933983495873969, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2208043995399085, + "kl": 0.018402099609375, + "learning_rate": 4.824963508582888e-07, + "loss": -0.0376, + "num_tokens": 107351455.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9178029298782349, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08825350193094735, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.062244353361874845, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11122216672215289, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2373 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1060.4375, + "completions/mean_terminated_length": 913.9166870117188, + "completions/min_length": 550.0, + "completions/min_terminated_length": 550.0, + "epoch": 0.5936484121030258, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5932059752904406, + "kl": 0.02178955078125, + "learning_rate": 4.821080131468057e-07, + "loss": -0.0544, + "num_tokens": 107398902.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9634152054786682, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04442813874105615, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10008469069076698, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563383, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2374 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1343.0, + "completions/max_terminated_length": 1343.0, + "completions/mean_length": 1034.875, + "completions/mean_terminated_length": 1034.875, + "completions/min_length": 714.0, + "completions/min_terminated_length": 714.0, + "epoch": 0.5938984746186546, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7960840727128495, + "kl": 0.0533447265625, + "learning_rate": 4.817197271667598e-07, + "loss": -0.0596, + "num_tokens": 107436628.0, + "reward": 0.0, + "reward_std": 0.5131012797355652, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10482474706538479, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10233087268748575, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09098229375970787, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2375 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1308.0, + "completions/mean_length": 1252.0, + "completions/mean_terminated_length": 1004.0, + "completions/min_length": 801.0, + "completions/min_terminated_length": 801.0, + "epoch": 0.5941485371342836, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0162060174612484, + "kl": 0.0162506103515625, + "learning_rate": 4.813314932140118e-07, + "loss": 0.0092, + "num_tokens": 107494572.0, + "reward": 0.0, + "reward_std": 0.9068087339401245, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02144072488413188, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0951768073409179, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2376 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1268.0, + "completions/max_terminated_length": 1268.0, + "completions/mean_length": 908.5625, + "completions/mean_terminated_length": 908.5625, + "completions/min_length": 537.0, + "completions/min_terminated_length": 537.0, + "epoch": 0.5943985996499125, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9892800582137653, + "kl": 0.0198974609375, + "learning_rate": 4.809433115843834e-07, + "loss": -0.0176, + "num_tokens": 107522189.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0432097911834717, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06427039876907814, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06637818242313315, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16238956361284543, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2377 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1164.5, + "completions/mean_terminated_length": 1087.076904296875, + "completions/min_length": 765.0, + "completions/min_terminated_length": 765.0, + "epoch": 0.5946486621655414, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3173697443172334, + "kl": 0.01788330078125, + "learning_rate": 4.805551825736559e-07, + "loss": 0.0273, + "num_tokens": 107576149.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8733838796615601, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2378 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1219.0, + "completions/max_terminated_length": 1219.0, + "completions/mean_length": 1096.125, + "completions/mean_terminated_length": 1096.125, + "completions/min_length": 828.0, + "completions/min_terminated_length": 828.0, + "epoch": 0.5948987246811703, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1406354306007342, + "kl": 0.00699615478515625, + "learning_rate": 4.801671064775706e-07, + "loss": -0.0043, + "num_tokens": 107619639.0, + "reward": 0.0, + "reward_std": 0.7952955961227417, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.013433707820695766, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1423625312466326, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238706, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2379 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1382.0, + "completions/mean_length": 1245.875, + "completions/mean_terminated_length": 1161.166748046875, + "completions/min_length": 1003.0, + "completions/min_terminated_length": 1003.0, + "epoch": 0.5951487871967992, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.02126177311834, + "kl": 0.009521484375, + "learning_rate": 4.797790835918289e-07, + "loss": -0.0262, + "num_tokens": 107673757.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0393929481506348, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.014695188262843278, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05627752608531525, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07490735018081413, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2380 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1454.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 978.3125, + "completions/mean_terminated_length": 978.3125, + "completions/min_length": 687.0, + "completions/min_terminated_length": 687.0, + "epoch": 0.5953988497124281, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2431193889546432, + "kl": 0.00909423828125, + "learning_rate": 4.79391114212091e-07, + "loss": 0.0197, + "num_tokens": 107699162.0, + "reward": 4.470348358154297e-08, + "reward_std": 0.9891939163208008, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.008635256432318397, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07802305592732554, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11155467020454343, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2381 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1401.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1167.625, + "completions/mean_terminated_length": 1167.625, + "completions/min_length": 906.0, + "completions/min_terminated_length": 906.0, + "epoch": 0.595648912228057, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.029556442133372, + "kl": 0.01666259765625, + "learning_rate": 4.79003198633977e-07, + "loss": 0.0242, + "num_tokens": 107745564.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.4983184337615967, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.027532140981948294, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21011809478603286, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12995725793078622, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2382 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1064.0, + "completions/mean_length": 1148.0, + "completions/mean_terminated_length": 796.0, + "completions/min_length": 566.0, + "completions/min_terminated_length": 566.0, + "epoch": 0.5958989747436859, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7487970449677674, + "kl": 0.01263427734375, + "learning_rate": 4.786153371530652e-07, + "loss": -0.0735, + "num_tokens": 107785916.0, + "reward": 0.0, + "reward_std": 1.0582702159881592, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05835706615129801, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05893698706208051, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590965, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2383 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1236.0, + "completions/mean_length": 1259.0, + "completions/mean_terminated_length": 1071.5555419921875, + "completions/min_length": 890.0, + "completions/min_terminated_length": 890.0, + "epoch": 0.5961490372593148, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4769865458984084, + "kl": 0.0141448974609375, + "learning_rate": 4.782275300648938e-07, + "loss": 0.0063, + "num_tokens": 107832948.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8939868807792664, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02792502506153363, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07594316125124069, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.22558565754251508, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2384 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1190.0, + "completions/mean_length": 988.5625, + "completions/mean_terminated_length": 954.4667358398438, + "completions/min_length": 662.0, + "completions/min_terminated_length": 662.0, + "epoch": 0.5963990997749438, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6652079938262783, + "kl": 0.02056884765625, + "learning_rate": 4.778397776649585e-07, + "loss": -0.0131, + "num_tokens": 107866117.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9433603286743164, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.025671144505095177, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07276101006627535, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2385 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1232.6875, + "completions/mean_terminated_length": 1072.300048828125, + "completions/min_length": 840.0, + "completions/min_terminated_length": 840.0, + "epoch": 0.5966491622905726, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6396986917211818, + "kl": 0.010589599609375, + "learning_rate": 4.774520802487139e-07, + "loss": -0.003, + "num_tokens": 107909472.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9499098062515259, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.13423871331171724, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.22405924458600546, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901863, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2386 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1412.6875, + "completions/mean_terminated_length": 1220.5999755859375, + "completions/min_length": 773.0, + "completions/min_terminated_length": 773.0, + "epoch": 0.5968992248062015, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8869349470416754, + "kl": 0.0154876708984375, + "learning_rate": 4.77064438111572e-07, + "loss": 0.0053, + "num_tokens": 107969059.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7437119483947754, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.026793839390022393, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11432719538441663, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2387 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1137.625, + "completions/mean_terminated_length": 920.2000122070312, + "completions/min_length": 677.0, + "completions/min_terminated_length": 677.0, + "epoch": 0.5971492873218305, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3786426939914613, + "kl": 0.01641845703125, + "learning_rate": 4.7667685154890415e-07, + "loss": -0.0177, + "num_tokens": 108013165.0, + "reward": 0.0, + "reward_std": 0.7719549536705017, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00538769440508907, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08026903972191064, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13305526559931294, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2388 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1289.0, + "completions/max_terminated_length": 1289.0, + "completions/mean_length": 844.5625, + "completions/mean_terminated_length": 844.5625, + "completions/min_length": 641.0, + "completions/min_terminated_length": 641.0, + "epoch": 0.5973993498374593, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.229929115832657, + "kl": 0.0114898681640625, + "learning_rate": 4.762893208560379e-07, + "loss": -0.0413, + "num_tokens": 108040062.0, + "reward": 0.0, + "reward_std": 0.8782192468643188, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06872770578237423, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06266592591989395, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13977495139343474, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2389 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1404.0, + "completions/mean_length": 1296.875, + "completions/mean_terminated_length": 1093.75, + "completions/min_length": 922.0, + "completions/min_terminated_length": 922.0, + "epoch": 0.5976494123530883, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.164195387754739, + "kl": 0.016632080078125, + "learning_rate": 4.7590184632825856e-07, + "loss": -0.0102, + "num_tokens": 108088588.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.8530811667442322, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.015261937132340704, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08491193077678315, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13109227736669002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2390 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1305.0, + "completions/mean_length": 1075.75, + "completions/mean_terminated_length": 1047.4666748046875, + "completions/min_length": 914.0, + "completions/min_terminated_length": 914.0, + "epoch": 0.5978994748687172, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4722745379393865, + "kl": 0.014251708984375, + "learning_rate": 4.7551442826080925e-07, + "loss": -0.001, + "num_tokens": 108135552.0, + "reward": 0.0, + "reward_std": 0.537696123123169, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.026166901339015614, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10040023930291397, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10327955589886448, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2391 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 941.25, + "completions/mean_terminated_length": 904.0000610351562, + "completions/min_length": 485.0, + "completions/min_terminated_length": 485.0, + "epoch": 0.5981495373843461, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.429535151720028, + "kl": 0.0180511474609375, + "learning_rate": 4.751270669488895e-07, + "loss": -0.0431, + "num_tokens": 108163996.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0575636625289917, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08353132134856697, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04981768089703139, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11417984514369003, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2392 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1272.3125, + "completions/mean_terminated_length": 1239.7857666015625, + "completions/min_length": 892.0, + "completions/min_terminated_length": 892.0, + "epoch": 0.598399599899975, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1749693016120872, + "kl": 0.0144500732421875, + "learning_rate": 4.747397626876557e-07, + "loss": -0.0328, + "num_tokens": 108215689.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0178636312484741, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.094301897446163, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0669828137838621, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05374838498865701, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2393 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1282.0, + "completions/max_terminated_length": 1282.0, + "completions/mean_length": 1031.375, + "completions/mean_terminated_length": 1031.375, + "completions/min_length": 882.0, + "completions/min_terminated_length": 882.0, + "epoch": 0.5986496624156039, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.748009573232553, + "kl": 0.018951416015625, + "learning_rate": 4.743525157722208e-07, + "loss": 0.012, + "num_tokens": 108249671.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0239605903625488, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05463412873280138, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15297420294803765, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563383, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2394 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1212.25, + "completions/mean_terminated_length": 1171.1429443359375, + "completions/min_length": 1000.0, + "completions/min_terminated_length": 1000.0, + "epoch": 0.5988997249312328, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.729034145298972, + "kl": 0.02410888671875, + "learning_rate": 4.739653264976543e-07, + "loss": 0.0057, + "num_tokens": 108294947.0, + "reward": 0.0, + "reward_std": 0.5211662650108337, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08488574405757304, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06535341828233023, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.094182643679026, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2395 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1371.8125, + "completions/mean_terminated_length": 1294.9000244140625, + "completions/min_length": 1077.0, + "completions/min_terminated_length": 1077.0, + "epoch": 0.5991497874468618, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9003199105775788, + "kl": 0.0146026611328125, + "learning_rate": 4.7357819515898136e-07, + "loss": 0.0312, + "num_tokens": 108349912.0, + "reward": 0.0, + "reward_std": 0.9882187843322754, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04733944072813496, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09194636542011397, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746357, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2396 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1364.0, + "completions/mean_length": 1116.3125, + "completions/mean_terminated_length": 1061.5, + "completions/min_length": 787.0, + "completions/min_terminated_length": 787.0, + "epoch": 0.5993998499624906, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0767974139801657, + "kl": 0.012786865234375, + "learning_rate": 4.7319112205118304e-07, + "loss": -0.0257, + "num_tokens": 108390573.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.7147715091705322, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0571992383798132, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2687436298006682, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.675, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13526380260918403, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2397 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1290.0, + "completions/max_terminated_length": 1290.0, + "completions/mean_length": 872.375, + "completions/mean_terminated_length": 872.375, + "completions/min_length": 736.0, + "completions/min_terminated_length": 736.0, + "epoch": 0.5996499124781195, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9898458652732134, + "kl": 0.012798309326171875, + "learning_rate": 4.728041074691966e-07, + "loss": 0.0015, + "num_tokens": 108427971.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.8930596113204956, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.010411557735521663, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05553073177590964, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1060223596263578, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2398 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1444.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1143.25, + "completions/mean_terminated_length": 1143.25, + "completions/min_length": 902.0, + "completions/min_terminated_length": 902.0, + "epoch": 0.5998999749937485, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.393414212119931, + "kl": 0.01904296875, + "learning_rate": 4.7241715170791407e-07, + "loss": -0.0029, + "num_tokens": 108468703.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9390842914581299, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1710374751041168, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11961152060176636, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242309, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2399 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1389.0, + "completions/max_terminated_length": 1389.0, + "completions/mean_length": 935.4375, + "completions/mean_terminated_length": 935.4375, + "completions/min_length": 539.0, + "completions/min_terminated_length": 539.0, + "epoch": 0.6001500375093773, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5374640135432753, + "kl": 0.0167999267578125, + "learning_rate": 4.7203025506218286e-07, + "loss": -0.0966, + "num_tokens": 108503318.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8086512088775635, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.056797061812279455, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04977555907778596, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901863, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2400 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1178.125, + "completions/mean_terminated_length": 985.0, + "completions/min_length": 691.0, + "completions/min_terminated_length": 691.0, + "epoch": 0.6004001000250062, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.633589590372604, + "kl": 0.0095672607421875, + "learning_rate": 4.716434178268052e-07, + "loss": 0.0313, + "num_tokens": 108538768.0, + "reward": 0.0, + "reward_std": 0.9053401350975037, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07165167611952784, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16639933338979743, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05962847939999442, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2401 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1296.0, + "completions/max_terminated_length": 1296.0, + "completions/mean_length": 852.5, + "completions/mean_terminated_length": 852.5, + "completions/min_length": 670.0, + "completions/min_terminated_length": 670.0, + "epoch": 0.6006501625406352, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5958449498303215, + "kl": 0.0173797607421875, + "learning_rate": 4.7125664029653844e-07, + "loss": 0.0013, + "num_tokens": 108577752.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5145223140716553, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.044845746970981246, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.056946730467153685, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2402 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1272.5, + "completions/mean_terminated_length": 1257.3333740234375, + "completions/min_length": 1073.0, + "completions/min_terminated_length": 1073.0, + "epoch": 0.6009002250562641, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.102131035922475, + "kl": 0.00875091552734375, + "learning_rate": 4.7086992276609425e-07, + "loss": 0.0095, + "num_tokens": 108624064.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9948878288269043, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11750903830847373, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05034309689821161, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08509254221575906, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2403 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1251.9375, + "completions/mean_terminated_length": 1235.4000244140625, + "completions/min_length": 959.0, + "completions/min_terminated_length": 959.0, + "epoch": 0.6011502875718929, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.846307207035103, + "kl": 0.0124359130859375, + "learning_rate": 4.704832655301382e-07, + "loss": -0.0075, + "num_tokens": 108673407.0, + "reward": 0.0, + "reward_std": 0.9972224831581116, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12928531153076087, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10848488588338111, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2404 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 1174.6875, + "completions/mean_terminated_length": 1153.0, + "completions/min_length": 889.0, + "completions/min_terminated_length": 889.0, + "epoch": 0.6014003500875219, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2134386605130874, + "kl": 0.0148162841796875, + "learning_rate": 4.7009666888329045e-07, + "loss": 0.015, + "num_tokens": 108710442.0, + "reward": 0.0, + "reward_std": 0.903186559677124, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06219877662666809, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02771746440266936, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09418264367902598, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2405 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1423.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1021.6875, + "completions/mean_terminated_length": 1021.6875, + "completions/min_length": 736.0, + "completions/min_terminated_length": 736.0, + "epoch": 0.6016504126031508, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.808242839287028, + "kl": 0.020538330078125, + "learning_rate": 4.697101331201251e-07, + "loss": -0.0161, + "num_tokens": 108755309.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.068321704864502, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05195795104787923, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0673830649484039, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12412657816683505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2406 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1319.0625, + "completions/mean_terminated_length": 1236.8182373046875, + "completions/min_length": 799.0, + "completions/min_terminated_length": 799.0, + "epoch": 0.6019004751187796, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7418774481050563, + "kl": 0.0131988525390625, + "learning_rate": 4.693236585351691e-07, + "loss": -0.034, + "num_tokens": 108807766.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8470091819763184, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0788425958950569, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10235444580340576, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13526380260918403, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 2407 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1169.875, + "completions/mean_terminated_length": 1147.86669921875, + "completions/min_length": 687.0, + "completions/min_terminated_length": 687.0, + "epoch": 0.6021505376344086, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.050064239276001, + "kl": 0.0147705078125, + "learning_rate": 4.6893724542290325e-07, + "loss": -0.0356, + "num_tokens": 108851612.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0447235107421875, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08136838892059652, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09982561941017644, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09428090415820636, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2408 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1255.5625, + "completions/mean_terminated_length": 1239.2667236328125, + "completions/min_length": 1069.0, + "completions/min_terminated_length": 1069.0, + "epoch": 0.6024006001500375, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2213199447757668, + "kl": 0.0177001953125, + "learning_rate": 4.685508940777617e-07, + "loss": -0.0127, + "num_tokens": 108901221.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.992063581943512, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08882912122344461, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12450365106872067, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12345339501504503, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2409 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1097.125, + "completions/mean_terminated_length": 1039.571533203125, + "completions/min_length": 644.0, + "completions/min_terminated_length": 644.0, + "epoch": 0.6026506626656665, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6134390366977063, + "kl": 0.01739501953125, + "learning_rate": 4.6816460479413135e-07, + "loss": -0.0346, + "num_tokens": 108943743.0, + "reward": 0.0, + "reward_std": 0.8130314350128174, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1103264771073462, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19877085268794428, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10809803506625451, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2410 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1276.9375, + "completions/mean_terminated_length": 1175.5455322265625, + "completions/min_length": 961.0, + "completions/min_terminated_length": 961.0, + "epoch": 0.6029007251812953, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.614224997268044, + "kl": 0.015960693359375, + "learning_rate": 4.6777837786635156e-07, + "loss": 0.0546, + "num_tokens": 108981486.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.802148699760437, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.053497516908562276, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09931907596852908, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13924399049470282, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2411 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1212.125, + "completions/mean_terminated_length": 988.2222290039062, + "completions/min_length": 263.0, + "completions/min_terminated_length": 263.0, + "epoch": 0.6031507876969242, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4865281823455287, + "kl": 0.0153045654296875, + "learning_rate": 4.673922135887146e-07, + "loss": -0.0694, + "num_tokens": 109027976.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5907195806503296, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005497326951452762, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03374750328344481, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14700718047466632, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2412 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1276.8125, + "completions/mean_terminated_length": 1244.9285888671875, + "completions/min_length": 443.0, + "completions/min_terminated_length": 443.0, + "epoch": 0.6034008502125532, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9399881697699695, + "kl": 0.010467529296875, + "learning_rate": 4.6700611225546485e-07, + "loss": 0.0426, + "num_tokens": 109092141.0, + "reward": 0.0, + "reward_std": 0.4815070927143097, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05045866720020658, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07499186630068386, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1593970119149271, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2413 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1228.5625, + "completions/mean_terminated_length": 1189.7857666015625, + "completions/min_length": 950.0, + "completions/min_terminated_length": 950.0, + "epoch": 0.603650912728182, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.645687754142111, + "kl": 0.0101776123046875, + "learning_rate": 4.6662007416079865e-07, + "loss": -0.0165, + "num_tokens": 109137422.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8951758742332458, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10997594735721333, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15994291647964823, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797316, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2414 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1392.0, + "completions/max_terminated_length": 1392.0, + "completions/mean_length": 1200.0625, + "completions/mean_terminated_length": 1200.0625, + "completions/min_length": 962.0, + "completions/min_terminated_length": 962.0, + "epoch": 0.6039009752438109, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.814963890924666, + "kl": 0.0123443603515625, + "learning_rate": 4.6623409959886395e-07, + "loss": 0.0557, + "num_tokens": 109190943.0, + "reward": 0.0, + "reward_std": 1.0196541547775269, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.030876194025745186, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10288807088144146, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590965, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2415 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1330.0, + "completions/max_terminated_length": 1330.0, + "completions/mean_length": 1131.3125, + "completions/mean_terminated_length": 1131.3125, + "completions/min_length": 988.0, + "completions/min_terminated_length": 988.0, + "epoch": 0.6041510377594399, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3718727090632195, + "kl": 0.0107421875, + "learning_rate": 4.658481888637609e-07, + "loss": 0.023, + "num_tokens": 109238668.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6953632235527039, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07496560590106328, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08284740341232762, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12041594578792297, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2416 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1269.3125, + "completions/mean_terminated_length": 1130.9000244140625, + "completions/min_length": 808.0, + "completions/min_terminated_length": 808.0, + "epoch": 0.6044011002750688, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6364245476050474, + "kl": 0.014251708984375, + "learning_rate": 4.654623422495405e-07, + "loss": 0.0192, + "num_tokens": 109277729.0, + "reward": 0.0, + "reward_std": 1.0614835023880005, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1180081604209045, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2417 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1438.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1164.6875, + "completions/mean_terminated_length": 1164.6875, + "completions/min_length": 877.0, + "completions/min_terminated_length": 877.0, + "epoch": 0.6046511627906976, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.155271088128753, + "kl": 0.0155029296875, + "learning_rate": 4.650765600502049e-07, + "loss": 0.0005, + "num_tokens": 109317612.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7380473613739014, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02640326181240253, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05475036925499433, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.094182643679026, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2418 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1482.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1148.375, + "completions/mean_terminated_length": 1148.375, + "completions/min_length": 838.0, + "completions/min_terminated_length": 838.0, + "epoch": 0.6049012253063266, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.292802972680513, + "kl": 0.0198974609375, + "learning_rate": 4.6469084255970756e-07, + "loss": -0.0473, + "num_tokens": 109357634.0, + "reward": 0.0, + "reward_std": 1.063892126083374, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.025073224831898888, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06574208715771167, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12852438880818895, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2419 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1335.625, + "completions/mean_terminated_length": 1312.1429443359375, + "completions/min_length": 1048.0, + "completions/min_terminated_length": 1048.0, + "epoch": 0.6051512878219555, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7782664590628476, + "kl": 0.0161895751953125, + "learning_rate": 4.6430519007195235e-07, + "loss": -0.0189, + "num_tokens": 109410996.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7071110606193542, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.19971548022204233, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21337101429946412, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12049281521534187, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2420 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1242.0, + "completions/max_terminated_length": 1242.0, + "completions/mean_length": 966.5, + "completions/mean_terminated_length": 966.5, + "completions/min_length": 760.0, + "completions/min_terminated_length": 760.0, + "epoch": 0.6054013503375844, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.733545177971152, + "kl": 0.0139312744140625, + "learning_rate": 4.639196028807936e-07, + "loss": 0.0055, + "num_tokens": 109462972.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0133249759674072, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.022136546285825367, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.044915088737620214, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797313, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2421 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1238.0, + "completions/max_terminated_length": 1238.0, + "completions/mean_length": 1015.875, + "completions/mean_terminated_length": 1015.875, + "completions/min_length": 788.0, + "completions/min_terminated_length": 788.0, + "epoch": 0.6056514128532133, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.301040732332543, + "kl": 0.0130767822265625, + "learning_rate": 4.6353408128003555e-07, + "loss": 0.0486, + "num_tokens": 109507498.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0664012432098389, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.15045187835727306, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06294989529171383, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941137, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2422 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1338.0, + "completions/mean_length": 1002.9375, + "completions/mean_terminated_length": 969.800048828125, + "completions/min_length": 617.0, + "completions/min_terminated_length": 617.0, + "epoch": 0.6059014753688422, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.911029528927042, + "kl": 0.019989013671875, + "learning_rate": 4.6314862556343346e-07, + "loss": 0.0145, + "num_tokens": 109561489.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9746617078781128, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07595796882562125, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.056895322294129615, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0894427190999916, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2423 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1232.8125, + "completions/mean_terminated_length": 1171.1539306640625, + "completions/min_length": 836.0, + "completions/min_terminated_length": 836.0, + "epoch": 0.6061515378844711, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.330397068226056, + "kl": 0.011249542236328125, + "learning_rate": 4.627632360246916e-07, + "loss": 0.0032, + "num_tokens": 109601038.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.790974497795105, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.023487749666768025, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08517811776945813, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194865, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2424 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1420.0, + "completions/mean_length": 1257.4375, + "completions/mean_terminated_length": 1201.4615478515625, + "completions/min_length": 962.0, + "completions/min_terminated_length": 962.0, + "epoch": 0.6064016004001, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4518132188929993, + "kl": 0.00878143310546875, + "learning_rate": 4.6237791295746353e-07, + "loss": 0.0393, + "num_tokens": 109641677.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5931657552719116, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04441618377004373, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09052377814882799, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06763190130459203, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2425 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1260.0, + "completions/max_terminated_length": 1260.0, + "completions/mean_length": 707.3125, + "completions/mean_terminated_length": 707.3125, + "completions/min_length": 433.0, + "completions/min_terminated_length": 433.0, + "epoch": 0.6066516629157289, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7588278183541024, + "kl": 0.01078033447265625, + "learning_rate": 4.619926566553534e-07, + "loss": -0.0511, + "num_tokens": 109664490.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0669152736663818, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.039537682997896964, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06433146827515071, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11080513425729775, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2426 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1283.0, + "completions/max_terminated_length": 1283.0, + "completions/mean_length": 1092.625, + "completions/mean_terminated_length": 1092.625, + "completions/min_length": 865.0, + "completions/min_terminated_length": 865.0, + "epoch": 0.6069017254313578, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.696677238433555, + "kl": 0.01171875, + "learning_rate": 4.61607467411913e-07, + "loss": -0.0018, + "num_tokens": 109708268.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0344769954681396, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11287975983729356, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20566877798372427, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06426219440409445, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2427 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1253.75, + "completions/mean_terminated_length": 1171.666748046875, + "completions/min_length": 982.0, + "completions/min_terminated_length": 982.0, + "epoch": 0.6071517879469868, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.990329104176829, + "kl": 0.018585205078125, + "learning_rate": 4.612223455206442e-07, + "loss": -0.0444, + "num_tokens": 109753624.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9478222131729126, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02568046127196166, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.043781260184482654, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.102469507659596, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2428 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1263.0, + "completions/mean_length": 1317.0, + "completions/mean_terminated_length": 1134.0, + "completions/min_length": 1001.0, + "completions/min_terminated_length": 1001.0, + "epoch": 0.6074018504626156, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.170368068969798, + "kl": 0.010772705078125, + "learning_rate": 4.608372912749967e-07, + "loss": -0.0193, + "num_tokens": 109810832.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.818994402885437, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08513596202523856, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11738032762743164, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2429 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1210.6875, + "completions/mean_terminated_length": 1114.25, + "completions/min_length": 955.0, + "completions/min_terminated_length": 955.0, + "epoch": 0.6076519129782446, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5275433758919896, + "kl": 0.02130126953125, + "learning_rate": 4.604523049683694e-07, + "loss": -0.0215, + "num_tokens": 109867027.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9782613515853882, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.002857073431966247, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0408040207772292, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15244914148902491, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2430 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1455.625, + "completions/mean_terminated_length": 1398.571533203125, + "completions/min_length": 1274.0, + "completions/min_terminated_length": 1274.0, + "epoch": 0.6079019754938735, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7457795078666596, + "kl": 0.0165557861328125, + "learning_rate": 4.600673868941088e-07, + "loss": -0.0011, + "num_tokens": 109925445.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0142372846603394, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0330094040973462, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13681722701793664, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0926962382871743, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2431 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1494.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1074.125, + "completions/mean_terminated_length": 1074.125, + "completions/min_length": 785.0, + "completions/min_terminated_length": 785.0, + "epoch": 0.6081520380095023, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2534920878762827, + "kl": 0.018524169921875, + "learning_rate": 4.5968253734550976e-07, + "loss": -0.0095, + "num_tokens": 109970167.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0399224758148193, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00998129906781449, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.020780845712314158, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2432 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1142.75, + "completions/mean_terminated_length": 1091.71435546875, + "completions/min_length": 612.0, + "completions/min_terminated_length": 612.0, + "epoch": 0.6084021005251313, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.557353772534241, + "kl": 0.0223388671875, + "learning_rate": 4.592977566158149e-07, + "loss": 0.0286, + "num_tokens": 110012075.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8403158783912659, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05773266678882624, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09539326857672237, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2433 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1362.0, + "completions/mean_length": 1006.0625, + "completions/mean_terminated_length": 973.1333618164062, + "completions/min_length": 734.0, + "completions/min_terminated_length": 734.0, + "epoch": 0.6086521630407602, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5310955510937365, + "kl": 0.017059326171875, + "learning_rate": 4.5891304499821435e-07, + "loss": 0.0255, + "num_tokens": 110049116.0, + "reward": 0.0, + "reward_std": 0.9939632415771484, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.024177725673717433, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07001835446861109, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13443985299781488, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2434 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1425.0, + "completions/mean_length": 1170.0625, + "completions/mean_terminated_length": 1020.0909423828125, + "completions/min_length": 781.0, + "completions/min_terminated_length": 781.0, + "epoch": 0.6089022255563891, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.11638222133952, + "kl": 0.0139007568359375, + "learning_rate": 4.585284027858455e-07, + "loss": 0.0035, + "num_tokens": 110086813.0, + "reward": 0.0, + "reward_std": 0.8528337478637695, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0027220479785507496, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.01348403969606977, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12700247883261817, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2435 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1394.0, + "completions/mean_length": 1221.5625, + "completions/mean_terminated_length": 1181.7857666015625, + "completions/min_length": 976.0, + "completions/min_terminated_length": 976.0, + "epoch": 0.609152288072018, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7453206953638887, + "kl": 0.01332855224609375, + "learning_rate": 4.5814383027179283e-07, + "loss": -0.0186, + "num_tokens": 110130726.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9506831765174866, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02475, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.31410730650527696, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12988598989256067, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2436 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1150.0, + "completions/mean_length": 1169.375, + "completions/mean_terminated_length": 971.0, + "completions/min_length": 880.0, + "completions/min_terminated_length": 880.0, + "epoch": 0.6094023505876469, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8185343564135796, + "kl": 0.01666259765625, + "learning_rate": 4.577593277490881e-07, + "loss": 0.0305, + "num_tokens": 110173092.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9916330575942993, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03823911586698196, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14075523646898877, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10532137766186214, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2437 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1428.0, + "completions/mean_length": 1329.75, + "completions/mean_terminated_length": 1227.5999755859375, + "completions/min_length": 1035.0, + "completions/min_terminated_length": 1035.0, + "epoch": 0.6096524131032758, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.63099093197892, + "kl": 0.010711669921875, + "learning_rate": 4.573748955107093e-07, + "loss": 0.0245, + "num_tokens": 110220032.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9337387681007385, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.007413344362457924, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03098660044145351, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10809803506625451, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2438 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1292.0, + "completions/mean_length": 1120.4375, + "completions/mean_terminated_length": 1032.84619140625, + "completions/min_length": 763.0, + "completions/min_terminated_length": 763.0, + "epoch": 0.6099024756189048, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7097288344121297, + "kl": 0.0222015380859375, + "learning_rate": 4.5699053384958076e-07, + "loss": -0.0125, + "num_tokens": 110266551.0, + "reward": 0.0, + "reward_std": 1.0134000778198242, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.030245727232489764, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03363671209271921, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09583937179043481, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2439 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1328.4375, + "completions/mean_terminated_length": 1271.25, + "completions/min_length": 808.0, + "completions/min_terminated_length": 808.0, + "epoch": 0.6101525381345336, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6711976322446316, + "kl": 0.01416778564453125, + "learning_rate": 4.5660624305857375e-07, + "loss": 0.0162, + "num_tokens": 110317254.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.33846214413642883, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.25322220596982475, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1697216927016707, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10809803506625448, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2440 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1337.0, + "completions/mean_length": 1155.25, + "completions/mean_terminated_length": 1132.2667236328125, + "completions/min_length": 803.0, + "completions/min_terminated_length": 803.0, + "epoch": 0.6104026006501625, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3989451130866946, + "kl": 0.017822265625, + "learning_rate": 4.562220234305049e-07, + "loss": -0.0119, + "num_tokens": 110358418.0, + "reward": 0.0, + "reward_std": 0.6337206363677979, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.014475035487706314, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04438623849742723, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1538999193800477, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2441 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1256.0, + "completions/mean_terminated_length": 1239.7333984375, + "completions/min_length": 1003.0, + "completions/min_terminated_length": 1003.0, + "epoch": 0.6106526631657915, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1877935650535805, + "kl": 0.016143798828125, + "learning_rate": 4.558378752581367e-07, + "loss": 0.048, + "num_tokens": 110414154.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.065034031867981, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1256150157358754, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16167111838045156, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.102469507659596, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2442 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1262.5, + "completions/mean_terminated_length": 1183.3333740234375, + "completions/min_length": 784.0, + "completions/min_terminated_length": 784.0, + "epoch": 0.6109027256814203, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.441669168440585, + "kl": 0.01019287109375, + "learning_rate": 4.5545379883417703e-07, + "loss": 0.0234, + "num_tokens": 110465490.0, + "reward": 0.0, + "reward_std": 0.9043567180633545, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.3252351945074197, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.616478990879241, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941137, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2443 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1420.0, + "completions/mean_length": 1312.5625, + "completions/mean_terminated_length": 1200.0999755859375, + "completions/min_length": 1014.0, + "completions/min_terminated_length": 1014.0, + "epoch": 0.6111527881970492, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1023381739085254, + "kl": 0.0143890380859375, + "learning_rate": 4.5506979445128015e-07, + "loss": 0.0232, + "num_tokens": 110513251.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8145723342895508, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.005487275581619445, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04336230280241563, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1593970119149271, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2444 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1282.0, + "completions/mean_length": 1275.5625, + "completions/mean_terminated_length": 1101.0, + "completions/min_length": 800.0, + "completions/min_terminated_length": 800.0, + "epoch": 0.6114028507126782, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3562102451106632, + "kl": 0.0124664306640625, + "learning_rate": 4.546858624020439e-07, + "loss": -0.0446, + "num_tokens": 110555884.0, + "reward": 0.0, + "reward_std": 0.5981509685516357, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.057084396036578736, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10298014615073019, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11474609652039006, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2445 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1175.8125, + "completions/mean_terminated_length": 1067.75, + "completions/min_length": 718.0, + "completions/min_terminated_length": 718.0, + "epoch": 0.6116529132283071, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1706352082616065, + "kl": 0.019073486328125, + "learning_rate": 4.543020029790119e-07, + "loss": 0.0042, + "num_tokens": 110612969.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8677204251289368, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.005449258195615479, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.01970441309110057, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921945, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2446 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1414.0, + "completions/mean_length": 1170.3125, + "completions/mean_terminated_length": 1060.416748046875, + "completions/min_length": 904.0, + "completions/min_terminated_length": 904.0, + "epoch": 0.6119029757439359, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9443366557073807, + "kl": 0.0148162841796875, + "learning_rate": 4.5391821647467257e-07, + "loss": 0.0067, + "num_tokens": 110668038.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.5630743503570557, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.023732504759249817, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.036933483268017445, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10327955589886448, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2447 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1199.8125, + "completions/mean_terminated_length": 1130.5384521484375, + "completions/min_length": 700.0, + "completions/min_terminated_length": 700.0, + "epoch": 0.6121530382595649, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.835473854905563, + "kl": 0.0131683349609375, + "learning_rate": 4.5353450318145816e-07, + "loss": 0.0002, + "num_tokens": 110716627.0, + "reward": 0.0, + "reward_std": 0.8072731494903564, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01662608398991843, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07382583747530493, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13326387079497304, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2448 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1280.375, + "completions/mean_terminated_length": 1148.5999755859375, + "completions/min_length": 744.0, + "completions/min_terminated_length": 744.0, + "epoch": 0.6124031007751938, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.087299788642021, + "kl": 0.01470947265625, + "learning_rate": 4.5315086339174555e-07, + "loss": -0.0104, + "num_tokens": 110753481.0, + "reward": 0.0, + "reward_std": 0.9636306762695312, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07263797142497679, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14096746185309886, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11666666666666668, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2449 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1416.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1156.5625, + "completions/mean_terminated_length": 1156.5625, + "completions/min_length": 767.0, + "completions/min_terminated_length": 767.0, + "epoch": 0.6126531632908228, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1451075204248062, + "kl": 0.0220947265625, + "learning_rate": 4.5276729739785544e-07, + "loss": 0.0283, + "num_tokens": 110807538.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.6968389749526978, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07319280617431509, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15502371377841592, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11800816042090449, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2450 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1220.0, + "completions/mean_length": 1034.1875, + "completions/mean_terminated_length": 1003.1333618164062, + "completions/min_length": 678.0, + "completions/min_terminated_length": 678.0, + "epoch": 0.6129032258064516, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1702089216695066, + "kl": 0.0163421630859375, + "learning_rate": 4.5238380549205247e-07, + "loss": -0.0105, + "num_tokens": 110843517.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8899219036102295, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08391555293081254, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08477541788172077, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11666666666666668, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2451 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1197.6875, + "completions/mean_terminated_length": 1096.916748046875, + "completions/min_length": 822.0, + "completions/min_terminated_length": 822.0, + "epoch": 0.6131532883220805, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5739857378765203, + "kl": 0.015777587890625, + "learning_rate": 4.5200038796654474e-07, + "loss": -0.0441, + "num_tokens": 110877416.0, + "reward": 0.0, + "reward_std": 0.7679810523986816, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.047663483758477634, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3093555258731047, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087684, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2452 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 1332.5625, + "completions/mean_terminated_length": 1232.0999755859375, + "completions/min_length": 1026.0, + "completions/min_terminated_length": 1026.0, + "epoch": 0.6134033508377095, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.01958468947453, + "kl": 0.01690673828125, + "learning_rate": 4.516170451134834e-07, + "loss": -0.0451, + "num_tokens": 110924105.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0507687330245972, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06497712470070634, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06345709721248052, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715323, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2453 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1204.6875, + "completions/mean_terminated_length": 1185.0001220703125, + "completions/min_length": 904.0, + "completions/min_terminated_length": 904.0, + "epoch": 0.6136534133533383, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2784535540004054, + "kl": 0.019775390625, + "learning_rate": 4.5123377722496336e-07, + "loss": -0.028, + "num_tokens": 110969916.0, + "reward": 0.0, + "reward_std": 0.9188560843467712, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.051676714832332965, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10613566319663746, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8250000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2454 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1386.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 1150.0, + "completions/mean_terminated_length": 1150.0, + "completions/min_length": 931.0, + "completions/min_terminated_length": 931.0, + "epoch": 0.6139034758689672, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.508835072913967, + "kl": 0.01800537109375, + "learning_rate": 4.5085058459302173e-07, + "loss": -0.0236, + "num_tokens": 111008676.0, + "reward": -1.862645149230957e-08, + "reward_std": 1.0328369140625, + "rewards/wordcountpos_reward_GEOBench/mean": -1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08858573345149472, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09804032819609963, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1275843947266976, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2455 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1189.0, + "completions/max_terminated_length": 1189.0, + "completions/mean_length": 985.6875, + "completions/mean_terminated_length": 985.6875, + "completions/min_length": 770.0, + "completions/min_terminated_length": 770.0, + "epoch": 0.6141535383845962, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.276357735236556, + "kl": 0.0074462890625, + "learning_rate": 4.504674675096387e-07, + "loss": 0.009, + "num_tokens": 111045487.0, + "reward": -5.960464477539063e-08, + "reward_std": 1.044190526008606, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09838752344300944, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09916982165204266, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797315, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2456 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1450.75, + "completions/mean_terminated_length": 1342.4000244140625, + "completions/min_length": 1198.0, + "completions/min_terminated_length": 1198.0, + "epoch": 0.614403600900225, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.045024196286179, + "kl": 0.01727294921875, + "learning_rate": 4.500844262667365e-07, + "loss": -0.0036, + "num_tokens": 111102979.0, + "reward": 0.0, + "reward_std": 0.8164104223251343, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09945664297168823, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20246388612144364, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2457 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1244.0, + "completions/max_terminated_length": 1244.0, + "completions/mean_length": 1050.4375, + "completions/mean_terminated_length": 1050.4375, + "completions/min_length": 874.0, + "completions/min_terminated_length": 874.0, + "epoch": 0.6146536634158539, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.524442733735685, + "kl": 0.016357421875, + "learning_rate": 4.4970146115618013e-07, + "loss": -0.0144, + "num_tokens": 111147370.0, + "reward": 0.0, + "reward_std": 0.9730882048606873, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.042537725495278886, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16012623728406197, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2458 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1438.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1090.875, + "completions/mean_terminated_length": 1090.875, + "completions/min_length": 846.0, + "completions/min_terminated_length": 846.0, + "epoch": 0.6149037259314829, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.075601613493458, + "kl": 0.01108551025390625, + "learning_rate": 4.4931857246977634e-07, + "loss": 0.0379, + "num_tokens": 111191088.0, + "reward": 0.0, + "reward_std": 0.8471033573150635, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.21442453864765962, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.22027415318831348, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09737289911202955, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2459 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1401.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1264.0, + "completions/mean_terminated_length": 1264.0, + "completions/min_length": 1081.0, + "completions/min_terminated_length": 1081.0, + "epoch": 0.6151537884471118, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.613804391762841, + "kl": 0.011749267578125, + "learning_rate": 4.4893576049927306e-07, + "loss": 0.0107, + "num_tokens": 111232968.0, + "reward": 0.0, + "reward_std": 1.0002063512802124, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06161879079820168, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07349500464608731, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9791666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.046943622609505783, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2460 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1204.0, + "completions/mean_length": 1254.1875, + "completions/mean_terminated_length": 1008.375, + "completions/min_length": 709.0, + "completions/min_terminated_length": 709.0, + "epoch": 0.6154038509627406, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.690266284199224, + "kl": 0.0138397216796875, + "learning_rate": 4.485530255363609e-07, + "loss": -0.0375, + "num_tokens": 111277683.0, + "reward": 0.0, + "reward_std": 0.8200098276138306, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1105190783573982, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08111972211105453, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13526380260918403, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 2461 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1433.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 998.125, + "completions/mean_terminated_length": 998.125, + "completions/min_length": 802.0, + "completions/min_terminated_length": 802.0, + "epoch": 0.6156539134783696, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.121143275158241, + "kl": 0.013031005859375, + "learning_rate": 4.481703678726708e-07, + "loss": -0.0531, + "num_tokens": 111315901.0, + "reward": 0.0, + "reward_std": 0.8772472143173218, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07550163870103044, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.22148808476502393, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.053748384988657034, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2462 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1397.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 1084.25, + "completions/mean_terminated_length": 1084.25, + "completions/min_length": 779.0, + "completions/min_terminated_length": 779.0, + "epoch": 0.6159039759939985, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2908009793246507, + "kl": 0.00853729248046875, + "learning_rate": 4.477877877997752e-07, + "loss": 0.0227, + "num_tokens": 111357657.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.033249855041504, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.052693987532909166, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06513561953893661, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2463 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1278.0, + "completions/max_terminated_length": 1278.0, + "completions/mean_length": 965.0, + "completions/mean_terminated_length": 965.0, + "completions/min_length": 661.0, + "completions/min_terminated_length": 661.0, + "epoch": 0.6161540385096274, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6932433593274556, + "kl": 0.0157623291015625, + "learning_rate": 4.474052856091874e-07, + "loss": 0.0131, + "num_tokens": 111395777.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0349390506744385, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.22292677620209517, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08618575298358186, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10183501544346313, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2464 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1367.0, + "completions/max_terminated_length": 1367.0, + "completions/mean_length": 1108.0, + "completions/mean_terminated_length": 1108.0, + "completions/min_length": 825.0, + "completions/min_terminated_length": 825.0, + "epoch": 0.6164041010252563, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.373810158441301, + "kl": 0.017547607421875, + "learning_rate": 4.470228615923617e-07, + "loss": 0.0011, + "num_tokens": 111430961.0, + "reward": 0.0, + "reward_std": 0.6298472285270691, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.14183990605558755, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13424092027918286, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13270686158262923, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2465 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1326.375, + "completions/mean_terminated_length": 1301.571533203125, + "completions/min_length": 1099.0, + "completions/min_terminated_length": 1099.0, + "epoch": 0.6166541635408852, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7478294136168397, + "kl": 0.0119476318359375, + "learning_rate": 4.466405160406921e-07, + "loss": 0.0208, + "num_tokens": 111484207.0, + "reward": 0.0, + "reward_std": 0.9744415283203125, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.016705705941162875, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03788799914661809, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1458055529095489, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2466 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1094.75, + "completions/mean_terminated_length": 1067.7333984375, + "completions/min_length": 786.0, + "completions/min_terminated_length": 786.0, + "epoch": 0.6169042260565142, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.781686669284054, + "kl": 0.0107421875, + "learning_rate": 4.4625824924551316e-07, + "loss": -0.0904, + "num_tokens": 111530099.0, + "reward": 0.0, + "reward_std": 0.8415950536727905, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009435844420336386, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09660070744503153, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06309898162000306, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2467 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1352.0, + "completions/max_terminated_length": 1352.0, + "completions/mean_length": 1072.9375, + "completions/mean_terminated_length": 1072.9375, + "completions/min_length": 875.0, + "completions/min_terminated_length": 875.0, + "epoch": 0.617154288572143, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.756172749443982, + "kl": 0.010101318359375, + "learning_rate": 4.4587606149809976e-07, + "loss": -0.0374, + "num_tokens": 111563650.0, + "reward": 0.0, + "reward_std": 0.9918938875198364, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005347446327942141, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02753964630641517, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14700718047466632, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2468 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1352.0, + "completions/mean_length": 1477.0625, + "completions/mean_terminated_length": 1316.5, + "completions/min_length": 1281.0, + "completions/min_terminated_length": 1281.0, + "epoch": 0.6174043510877719, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0182723531440274, + "kl": 0.01214599609375, + "learning_rate": 4.454939530896662e-07, + "loss": 0.0023, + "num_tokens": 111620515.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.623458981513977, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.012085641927946464, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06886086669254954, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12102953419784838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2469 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1385.875, + "completions/mean_terminated_length": 1297.111083984375, + "completions/min_length": 1133.0, + "completions/min_terminated_length": 1133.0, + "epoch": 0.6176544136034009, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3325867943112426, + "kl": 0.0120697021484375, + "learning_rate": 4.451119243113662e-07, + "loss": -0.0255, + "num_tokens": 111661009.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9701966047286987, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.023251296546602192, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03422187686501016, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07503085784948506, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2470 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1466.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1053.375, + "completions/mean_terminated_length": 1053.375, + "completions/min_length": 792.0, + "completions/min_terminated_length": 792.0, + "epoch": 0.6179044761190298, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6150680979059002, + "kl": 0.0171661376953125, + "learning_rate": 4.4472997545429305e-07, + "loss": -0.0012, + "num_tokens": 111708271.0, + "reward": 0.0, + "reward_std": 0.9126315116882324, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05978565561523308, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09347474187715339, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10749676997731403, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2471 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1277.5625, + "completions/mean_terminated_length": 1203.416748046875, + "completions/min_length": 986.0, + "completions/min_terminated_length": 986.0, + "epoch": 0.6181545386346586, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2238813064755103, + "kl": 0.0165252685546875, + "learning_rate": 4.443481068094792e-07, + "loss": 0.0383, + "num_tokens": 111766440.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9436094760894775, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.19751857201634185, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16598966256773637, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0909822937597079, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2472 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1316.0, + "completions/max_terminated_length": 1316.0, + "completions/mean_length": 1054.1875, + "completions/mean_terminated_length": 1054.1875, + "completions/min_length": 940.0, + "completions/min_terminated_length": 940.0, + "epoch": 0.6184046011502876, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.83975797619147, + "kl": 0.032379150390625, + "learning_rate": 4.439663186678959e-07, + "loss": 0.0327, + "num_tokens": 111819755.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0391589403152466, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.14118771492677332, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1166478567704542, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797315, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2473 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1333.0, + "completions/mean_length": 1171.25, + "completions/mean_terminated_length": 1095.3846435546875, + "completions/min_length": 745.0, + "completions/min_terminated_length": 745.0, + "epoch": 0.6186546636659165, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.726666444398136, + "kl": 0.0165252685546875, + "learning_rate": 4.435846113204528e-07, + "loss": 0.0013, + "num_tokens": 111862679.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7529423236846924, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.002455096938031757, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08191093609632819, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1060223596263578, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2474 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1204.9375, + "completions/mean_terminated_length": 1070.8182373046875, + "completions/min_length": 868.0, + "completions/min_terminated_length": 868.0, + "epoch": 0.6189047261815454, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9717122594452574, + "kl": 0.0147247314453125, + "learning_rate": 4.432029850579982e-07, + "loss": -0.0433, + "num_tokens": 111919070.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8764379024505615, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.021168560852853187, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05680903969082012, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06191391873668906, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2475 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1387.0, + "completions/mean_length": 1074.25, + "completions/mean_terminated_length": 1045.86669921875, + "completions/min_length": 921.0, + "completions/min_terminated_length": 921.0, + "epoch": 0.6191547886971743, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.068118011805008, + "kl": 0.02105712890625, + "learning_rate": 4.4282144017131906e-07, + "loss": 0.0055, + "num_tokens": 111956746.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9828945398330688, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05654223086644272, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05552578450902323, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1067187372905475, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2476 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1472.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 997.5625, + "completions/mean_terminated_length": 997.5625, + "completions/min_length": 743.0, + "completions/min_terminated_length": 743.0, + "epoch": 0.6194048512128032, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.213037459597296, + "kl": 0.0099029541015625, + "learning_rate": 4.4243997695113955e-07, + "loss": -0.0936, + "num_tokens": 112004307.0, + "reward": 0.0, + "reward_std": 0.6843645572662354, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06061108651796224, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09911000489807187, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746357, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2477 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1104.1875, + "completions/mean_terminated_length": 1012.84619140625, + "completions/min_length": 499.0, + "completions/min_terminated_length": 499.0, + "epoch": 0.6196549137284321, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.39630923231499, + "kl": 0.017364501953125, + "learning_rate": 4.4205859568812175e-07, + "loss": 0.0388, + "num_tokens": 112040422.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0121484994888306, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.31061709575245156, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2999979260871103, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2478 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1305.0, + "completions/max_terminated_length": 1305.0, + "completions/mean_length": 975.8125, + "completions/mean_terminated_length": 975.8125, + "completions/min_length": 738.0, + "completions/min_terminated_length": 738.0, + "epoch": 0.619904976244061, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4893086618713447, + "kl": 0.0105133056640625, + "learning_rate": 4.416772966728659e-07, + "loss": 0.0259, + "num_tokens": 112081187.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9387719631195068, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.053371095934081944, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03816124684712363, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11474609652039006, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2479 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1184.125, + "completions/mean_terminated_length": 1163.0667724609375, + "completions/min_length": 825.0, + "completions/min_terminated_length": 825.0, + "epoch": 0.6201550387596899, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7241327979881746, + "kl": 0.0191650390625, + "learning_rate": 4.412960801959091e-07, + "loss": 0.0076, + "num_tokens": 112129917.0, + "reward": 0.0, + "reward_std": 0.9432554841041565, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.058690233813388545, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0479580394466852, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11538983843829065, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2480 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1494.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1244.0, + "completions/mean_terminated_length": 1244.0, + "completions/min_length": 980.0, + "completions/min_terminated_length": 980.0, + "epoch": 0.6204051012753188, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5754751572189383, + "kl": 0.011383056640625, + "learning_rate": 4.409149465477253e-07, + "loss": -0.0094, + "num_tokens": 112170813.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0464900732040405, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04716804795124088, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.130121528218915, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11080513425729775, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2481 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1447.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1185.0, + "completions/mean_terminated_length": 1185.0, + "completions/min_length": 824.0, + "completions/min_terminated_length": 824.0, + "epoch": 0.6206551637909478, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5534924194644892, + "kl": 0.01922607421875, + "learning_rate": 4.4053389601872624e-07, + "loss": -0.0295, + "num_tokens": 112227373.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9711065292358398, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.027622493793990437, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04437893444528393, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6416666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823627, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2482 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1425.4375, + "completions/mean_terminated_length": 1329.571533203125, + "completions/min_length": 1022.0, + "completions/min_terminated_length": 1022.0, + "epoch": 0.6209052263065766, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.707904653791556, + "kl": 0.0118560791015625, + "learning_rate": 4.4015292889925937e-07, + "loss": -0.0228, + "num_tokens": 112284804.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8778500556945801, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0022407220099551622, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0449382838774046, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10327955589886445, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2483 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1454.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 1211.375, + "completions/mean_terminated_length": 1211.375, + "completions/min_length": 868.0, + "completions/min_terminated_length": 868.0, + "epoch": 0.6211552888222055, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8961762575460113, + "kl": 0.0181884765625, + "learning_rate": 4.397720454796091e-07, + "loss": 0.0232, + "num_tokens": 112322186.0, + "reward": 0.0, + "reward_std": 0.9799769520759583, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03258926999135995, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0500606531309639, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2484 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1412.1875, + "completions/mean_terminated_length": 1219.0, + "completions/min_length": 1127.0, + "completions/min_terminated_length": 1127.0, + "epoch": 0.6214053513378345, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2284308532008925, + "kl": 0.01178741455078125, + "learning_rate": 4.393912460499957e-07, + "loss": -0.0454, + "num_tokens": 112364677.0, + "reward": 0.0, + "reward_std": 0.7828980088233948, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.001122566203231124, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.022305260166148207, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2485 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.8125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1106.0, + "completions/mean_length": 1409.1875, + "completions/mean_terminated_length": 1015.6666870117188, + "completions/min_length": 876.0, + "completions/min_terminated_length": 876.0, + "epoch": 0.6216554138534633, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3779708525494545, + "kl": 0.01324462890625, + "learning_rate": 4.3901053090057605e-07, + "loss": -0.0445, + "num_tokens": 112434584.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0341322422027588, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04028404919220928, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16039852916162572, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941139, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2486 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1264.0, + "completions/mean_length": 1019.625, + "completions/mean_terminated_length": 987.6000366210938, + "completions/min_length": 874.0, + "completions/min_terminated_length": 874.0, + "epoch": 0.6219054763690923, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4667128775712306, + "kl": 0.0162353515625, + "learning_rate": 4.386299003214422e-07, + "loss": 0.0012, + "num_tokens": 112477210.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.7548008561134338, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.19288502235923527, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3232151550475479, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08509254221575908, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2487 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1365.0, + "completions/mean_length": 921.6875, + "completions/mean_terminated_length": 883.1333618164062, + "completions/min_length": 555.0, + "completions/min_terminated_length": 555.0, + "epoch": 0.6221555388847212, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1397143548024564, + "kl": 0.0122222900390625, + "learning_rate": 4.382493546026219e-07, + "loss": -0.0338, + "num_tokens": 112525437.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.7898119688034058, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01203073096090308, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08393737208250217, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06426219440409446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2488 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1485.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1102.0, + "completions/mean_terminated_length": 1102.0, + "completions/min_length": 841.0, + "completions/min_terminated_length": 841.0, + "epoch": 0.6224056014003501, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7163756833238604, + "kl": 0.0145111083984375, + "learning_rate": 4.378688940340787e-07, + "loss": 0.0248, + "num_tokens": 112570213.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0198959112167358, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01743442372313304, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08814702465951343, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194865, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2489 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1413.0, + "completions/mean_length": 1346.0, + "completions/mean_terminated_length": 1276.0, + "completions/min_length": 1166.0, + "completions/min_terminated_length": 1166.0, + "epoch": 0.622655663915979, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0564450130334437, + "kl": 0.0057830810546875, + "learning_rate": 4.374885189057106e-07, + "loss": -0.0185, + "num_tokens": 112627829.0, + "reward": 0.0, + "reward_std": 0.9884312152862549, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08177064755442034, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09514953512656515, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08062257748298551, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2490 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 1133.375, + "completions/mean_terminated_length": 1081.0, + "completions/min_length": 684.0, + "completions/min_terminated_length": 684.0, + "epoch": 0.6229057264316079, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1189960904233756, + "kl": 0.01513671875, + "learning_rate": 4.3710822950735093e-07, + "loss": 0.0128, + "num_tokens": 112672795.0, + "reward": 0.0, + "reward_std": 1.0350239276885986, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.041275146971997176, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10974673190664698, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620105, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2491 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1372.9375, + "completions/mean_terminated_length": 1274.111083984375, + "completions/min_length": 1146.0, + "completions/min_terminated_length": 1146.0, + "epoch": 0.6231557889472368, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.153874170974084, + "kl": 0.0114593505859375, + "learning_rate": 4.3672802612876733e-07, + "loss": -0.0018, + "num_tokens": 112716850.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9234411120414734, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.015660305644042256, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15503302759158633, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09428090415820635, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2492 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1193.625, + "completions/mean_terminated_length": 1173.2000732421875, + "completions/min_length": 952.0, + "completions/min_terminated_length": 952.0, + "epoch": 0.6234058514628658, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4380538843660484, + "kl": 0.0164337158203125, + "learning_rate": 4.3634790905966256e-07, + "loss": -0.0331, + "num_tokens": 112770628.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8560225963592529, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09391768750636614, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14779774096197817, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11409872268574492, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2493 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1486.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1113.0625, + "completions/mean_terminated_length": 1113.0625, + "completions/min_length": 697.0, + "completions/min_terminated_length": 697.0, + "epoch": 0.6236559139784946, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.917786237399799, + "kl": 0.0138702392578125, + "learning_rate": 4.359678785896731e-07, + "loss": -0.041, + "num_tokens": 112803157.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0446703433990479, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03216878786790883, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05261722555597594, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7124999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09651328828101764, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2494 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1377.0, + "completions/mean_length": 1233.625, + "completions/mean_terminated_length": 1195.571533203125, + "completions/min_length": 902.0, + "completions/min_terminated_length": 902.0, + "epoch": 0.6239059764941235, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4025964304127627, + "kl": 0.0110626220703125, + "learning_rate": 4.355879350083691e-07, + "loss": 0.0279, + "num_tokens": 112857303.0, + "reward": 0.0, + "reward_std": 0.43833452463150024, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09559601715556787, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08505297723640917, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16049691355057039, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2495 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1366.0, + "completions/mean_length": 1325.5, + "completions/mean_terminated_length": 1189.77783203125, + "completions/min_length": 1073.0, + "completions/min_terminated_length": 1073.0, + "epoch": 0.6241560390097525, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.721233304177927, + "kl": 0.0152587890625, + "learning_rate": 4.3520807860525534e-07, + "loss": -0.0201, + "num_tokens": 112910319.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7159320116043091, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.030477538897343996, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05006754289819111, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10945995377982527, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2496 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1429.0, + "completions/max_terminated_length": 1429.0, + "completions/mean_length": 1180.3125, + "completions/mean_terminated_length": 1180.3125, + "completions/min_length": 913.0, + "completions/min_terminated_length": 913.0, + "epoch": 0.6244061015253813, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8236675445133805, + "kl": 0.0111083984375, + "learning_rate": 4.348283096697698e-07, + "loss": -0.0304, + "num_tokens": 112952020.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.790416955947876, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03787156431926795, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07857507085860692, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1167460047694551, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2497 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1422.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 1096.1875, + "completions/mean_terminated_length": 1096.1875, + "completions/min_length": 742.0, + "completions/min_terminated_length": 742.0, + "epoch": 0.6246561640410102, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2049727483857957, + "kl": 0.022979736328125, + "learning_rate": 4.344486284912836e-07, + "loss": -0.0103, + "num_tokens": 112996135.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.048957347869873, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0953542596578254, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11976946592281587, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12412657816683505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2498 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 964.0, + "completions/mean_length": 1161.5625, + "completions/mean_terminated_length": 823.125, + "completions/min_length": 646.0, + "completions/min_terminated_length": 646.0, + "epoch": 0.6249062265566392, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2992386770829385, + "kl": 0.023773193359375, + "learning_rate": 4.34069035359101e-07, + "loss": -0.0192, + "num_tokens": 113056920.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9217933416366577, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.028946647782063562, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14240663446134721, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0758897836290186, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2499 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1235.5, + "completions/mean_terminated_length": 1197.71435546875, + "completions/min_length": 940.0, + "completions/min_terminated_length": 940.0, + "epoch": 0.6251562890722681, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.4750962454280614, + "kl": 0.00724029541015625, + "learning_rate": 4.3368953056245995e-07, + "loss": -0.0055, + "num_tokens": 113097920.0, + "reward": 0.0, + "reward_std": 0.5036134719848633, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.008403828852479395, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14530807677192692, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2500 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1495.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1049.1875, + "completions/mean_terminated_length": 1049.1875, + "completions/min_length": 636.0, + "completions/min_terminated_length": 636.0, + "epoch": 0.6254063515878969, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2084183020260952, + "kl": 0.0110015869140625, + "learning_rate": 4.3331011439053e-07, + "loss": -0.0649, + "num_tokens": 113132667.0, + "reward": 0.0, + "reward_std": 0.9868022799491882, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07793658950907816, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11975260220516151, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08255189164891873, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2501 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1426.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1006.875, + "completions/mean_terminated_length": 1006.875, + "completions/min_length": 808.0, + "completions/min_terminated_length": 808.0, + "epoch": 0.6256564141035259, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.213121102599761, + "kl": 0.01206207275390625, + "learning_rate": 4.329307871324137e-07, + "loss": 0.0013, + "num_tokens": 113174753.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8765769004821777, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03684003423704415, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09321398835078837, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639736, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2502 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1319.375, + "completions/mean_terminated_length": 1211.0, + "completions/min_length": 983.0, + "completions/min_terminated_length": 983.0, + "epoch": 0.6259064766191548, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8024657370562522, + "kl": 0.013275146484375, + "learning_rate": 4.3255154907714596e-07, + "loss": 0.0051, + "num_tokens": 113226599.0, + "reward": 0.0, + "reward_std": 1.0015666484832764, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09971498241517474, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14433142959125736, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09269623828717427, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2503 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1471.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1182.75, + "completions/mean_terminated_length": 1182.75, + "completions/min_length": 896.0, + "completions/min_terminated_length": 896.0, + "epoch": 0.6261565391347836, + "frac_reward_zero_std": 0.0, + "grad_norm": 82.63379687284758, + "kl": 0.14581298828125, + "learning_rate": 4.321724005136935e-07, + "loss": 0.0384, + "num_tokens": 113275251.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.040063738822937, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07114171264969532, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07014326591547357, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1439521525445946, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2504 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1353.0, + "completions/max_terminated_length": 1353.0, + "completions/mean_length": 1102.75, + "completions/mean_terminated_length": 1102.75, + "completions/min_length": 849.0, + "completions/min_terminated_length": 849.0, + "epoch": 0.6264066016504126, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.070642323791395, + "kl": 0.0186767578125, + "learning_rate": 4.3179334173095494e-07, + "loss": -0.0234, + "num_tokens": 113314295.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.4493151903152466, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0698847276826599, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19332153237909988, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15533714826025885, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2505 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1342.0, + "completions/mean_length": 1184.1875, + "completions/mean_terminated_length": 1163.1334228515625, + "completions/min_length": 862.0, + "completions/min_terminated_length": 862.0, + "epoch": 0.6266566641660415, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5526129701514666, + "kl": 0.019256591796875, + "learning_rate": 4.314143730177604e-07, + "loss": 0.0418, + "num_tokens": 113364170.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.3947451710700989, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08200252358872033, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11591226359590451, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.675, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13957607775504183, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2506 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1435.0, + "completions/max_terminated_length": 1435.0, + "completions/mean_length": 1137.0, + "completions/mean_terminated_length": 1137.0, + "completions/min_length": 847.0, + "completions/min_terminated_length": 847.0, + "epoch": 0.6269067266816705, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6404055895726346, + "kl": 0.020355224609375, + "learning_rate": 4.310354946628716e-07, + "loss": -0.0068, + "num_tokens": 113416618.0, + "reward": 0.0, + "reward_std": 0.6128249168395996, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06312963910207728, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18532172578769301, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14981470036162822, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2507 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1166.625, + "completions/mean_terminated_length": 1144.4000244140625, + "completions/min_length": 783.0, + "completions/min_terminated_length": 783.0, + "epoch": 0.6271567891972993, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.990260632529337, + "kl": 0.0135498046875, + "learning_rate": 4.3065670695498114e-07, + "loss": -0.0091, + "num_tokens": 113453828.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0387402772903442, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03567846332147066, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06923586228907701, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2508 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1359.125, + "completions/mean_terminated_length": 1326.615478515625, + "completions/min_length": 1081.0, + "completions/min_terminated_length": 1081.0, + "epoch": 0.6274068517129282, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5161920171292667, + "kl": 0.0128173828125, + "learning_rate": 4.3027801018271246e-07, + "loss": 0.0129, + "num_tokens": 113514134.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7470964789390564, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04057517183000437, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.056371983163172365, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07302967433402217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2509 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1448.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1081.0625, + "completions/mean_terminated_length": 1081.0625, + "completions/min_length": 769.0, + "completions/min_terminated_length": 769.0, + "epoch": 0.6276569142285572, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.620031395005412, + "kl": 0.020477294921875, + "learning_rate": 4.298994046346203e-07, + "loss": -0.0135, + "num_tokens": 113561407.0, + "reward": 0.0, + "reward_std": 0.9411873817443848, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.004583864271018778, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03450628332607171, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11147163731607215, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2510 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1429.0, + "completions/mean_length": 1183.5625, + "completions/mean_terminated_length": 1162.4666748046875, + "completions/min_length": 874.0, + "completions/min_terminated_length": 874.0, + "epoch": 0.627906976744186, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.124345664810864, + "kl": 0.0159454345703125, + "learning_rate": 4.295208905991894e-07, + "loss": 0.0185, + "num_tokens": 113599664.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9555768966674805, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005980488081129071, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0823932184302996, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353545, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2511 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1315.0, + "completions/max_terminated_length": 1315.0, + "completions/mean_length": 924.8125, + "completions/mean_terminated_length": 924.8125, + "completions/min_length": 533.0, + "completions/min_terminated_length": 533.0, + "epoch": 0.6281570392598149, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8134488481062934, + "kl": 0.0171051025390625, + "learning_rate": 4.291424683648348e-07, + "loss": -0.0429, + "num_tokens": 113639645.0, + "reward": 0.0, + "reward_std": 0.7816681861877441, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06796585890018624, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09852787101069864, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11979921473804349, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2512 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1216.1875, + "completions/mean_terminated_length": 1175.6429443359375, + "completions/min_length": 914.0, + "completions/min_terminated_length": 914.0, + "epoch": 0.6284071017754439, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6682672145768453, + "kl": 0.0146636962890625, + "learning_rate": 4.287641382199014e-07, + "loss": -0.0754, + "num_tokens": 113676216.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0596281290054321, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.025367801401904012, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03664776014747686, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10878112581387149, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2513 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1179.0, + "completions/mean_terminated_length": 1157.60009765625, + "completions/min_length": 874.0, + "completions/min_terminated_length": 874.0, + "epoch": 0.6286571642910728, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.916169997852943, + "kl": 0.017486572265625, + "learning_rate": 4.2838590045266467e-07, + "loss": 0.0044, + "num_tokens": 113717512.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.034407615661621, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.032449670363558655, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09868054180791731, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11213417888437975, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2514 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1191.0, + "completions/mean_terminated_length": 1170.4000244140625, + "completions/min_length": 525.0, + "completions/min_terminated_length": 525.0, + "epoch": 0.6289072268067016, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3268633188945813, + "kl": 0.0177001953125, + "learning_rate": 4.2800775535132903e-07, + "loss": -0.0202, + "num_tokens": 113767656.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0414235591888428, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03800065556966067, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06897229407275599, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2515 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1247.0, + "completions/mean_length": 987.4375, + "completions/mean_terminated_length": 953.2667236328125, + "completions/min_length": 734.0, + "completions/min_terminated_length": 734.0, + "epoch": 0.6291572893223306, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7594432736926136, + "kl": 0.01495361328125, + "learning_rate": 4.276297032040281e-07, + "loss": -0.092, + "num_tokens": 113806999.0, + "reward": 0.0, + "reward_std": 0.9920101165771484, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.016118540421911888, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1310470255391571, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11729986896522632, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2516 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1282.0, + "completions/mean_terminated_length": 1267.4666748046875, + "completions/min_length": 1013.0, + "completions/min_terminated_length": 1013.0, + "epoch": 0.6294073518379595, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9153356607615573, + "kl": 0.013275146484375, + "learning_rate": 4.272517442988258e-07, + "loss": 0.0224, + "num_tokens": 113846287.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6647114753723145, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06272880956054994, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3351638098448217, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13984117975602023, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2517 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 966.5, + "completions/mean_terminated_length": 930.9334106445312, + "completions/min_length": 786.0, + "completions/min_terminated_length": 786.0, + "epoch": 0.6296574143535884, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.637262080815987, + "kl": 0.01025390625, + "learning_rate": 4.2687387892371383e-07, + "loss": -0.01, + "num_tokens": 113884911.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8836747407913208, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.003996177349239365, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09400601966821306, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07685966046898339, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2518 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1429.0, + "completions/max_terminated_length": 1429.0, + "completions/mean_length": 1184.875, + "completions/mean_terminated_length": 1184.875, + "completions/min_length": 924.0, + "completions/min_terminated_length": 924.0, + "epoch": 0.6299074768692173, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2049574723066856, + "kl": 0.020050048828125, + "learning_rate": 4.264961073666129e-07, + "loss": -0.0154, + "num_tokens": 113922901.0, + "reward": 0.0, + "reward_std": 0.7975951433181763, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03137573112029145, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09526035422598968, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10749676997731401, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 2519 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1213.0, + "completions/max_terminated_length": 1213.0, + "completions/mean_length": 1003.8125, + "completions/mean_terminated_length": 1003.8125, + "completions/min_length": 768.0, + "completions/min_terminated_length": 768.0, + "epoch": 0.6301575393848462, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2980094966107742, + "kl": 0.0112762451171875, + "learning_rate": 4.261184299153726e-07, + "loss": 0.0021, + "num_tokens": 113969194.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0324320793151855, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.029715488364797142, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07285366753603172, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06871842709362769, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2520 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1280.75, + "completions/mean_terminated_length": 1249.4285888671875, + "completions/min_length": 1009.0, + "completions/min_terminated_length": 1009.0, + "epoch": 0.6304076019004751, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.314091277007164, + "kl": 0.0168304443359375, + "learning_rate": 4.2574084685777046e-07, + "loss": -0.0064, + "num_tokens": 114023934.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9408501386642456, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09496245223845094, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11328395658075024, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1246476515504285, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2521 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1334.0, + "completions/mean_length": 1187.6875, + "completions/mean_terminated_length": 1166.86669921875, + "completions/min_length": 949.0, + "completions/min_terminated_length": 949.0, + "epoch": 0.630657664416104, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.220657024195827, + "kl": 0.0155181884765625, + "learning_rate": 4.253633584815124e-07, + "loss": 0.0374, + "num_tokens": 114074465.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.0635181665420532, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05127377838237515, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12224690082348048, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237135, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2522 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1135.0, + "completions/max_terminated_length": 1135.0, + "completions/mean_length": 992.0625, + "completions/mean_terminated_length": 992.0625, + "completions/min_length": 833.0, + "completions/min_terminated_length": 833.0, + "epoch": 0.6309077269317329, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3650095891483804, + "kl": 0.01064300537109375, + "learning_rate": 4.2498596507423177e-07, + "loss": 0.0323, + "num_tokens": 114115346.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7444610595703125, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.022463767945358495, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07211952844795727, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05146016078626404, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2523 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1313.0625, + "completions/mean_terminated_length": 1300.60009765625, + "completions/min_length": 1138.0, + "completions/min_terminated_length": 1138.0, + "epoch": 0.6311577894473619, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8394102579084506, + "kl": 0.01763916015625, + "learning_rate": 4.246086669234901e-07, + "loss": 0.0174, + "num_tokens": 114171763.0, + "reward": 0.0, + "reward_std": 0.7692309617996216, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06686878991939024, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13428839728261596, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10036968702787748, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2524 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1406.0, + "completions/max_terminated_length": 1406.0, + "completions/mean_length": 1087.25, + "completions/mean_terminated_length": 1087.25, + "completions/min_length": 731.0, + "completions/min_terminated_length": 731.0, + "epoch": 0.6314078519629908, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2850256229530963, + "kl": 0.012054443359375, + "learning_rate": 4.242314643167758e-07, + "loss": -0.0232, + "num_tokens": 114213791.0, + "reward": 0.0, + "reward_std": 1.0559136867523193, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.021495010441423885, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06632790840257811, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2525 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1469.4375, + "completions/mean_terminated_length": 1451.0999755859375, + "completions/min_length": 1358.0, + "completions/min_terminated_length": 1358.0, + "epoch": 0.6316579144786196, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0501770765708125, + "kl": 0.01043701171875, + "learning_rate": 4.238543575415048e-07, + "loss": -0.0044, + "num_tokens": 114257662.0, + "reward": 0.0, + "reward_std": 0.6015655994415283, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06033397022340779, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10259165034804804, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952503, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2526 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1284.0, + "completions/max_terminated_length": 1284.0, + "completions/mean_length": 1006.9375, + "completions/mean_terminated_length": 1006.9375, + "completions/min_length": 863.0, + "completions/min_terminated_length": 863.0, + "epoch": 0.6319079769942486, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6198750132898354, + "kl": 0.0211181640625, + "learning_rate": 4.234773468850198e-07, + "loss": -0.0243, + "num_tokens": 114305293.0, + "reward": 0.0, + "reward_std": 1.036360263824463, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.034803239018633296, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05456868171238814, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08062257748298553, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2527 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1257.0, + "completions/mean_terminated_length": 1200.923095703125, + "completions/min_length": 898.0, + "completions/min_terminated_length": 898.0, + "epoch": 0.6321580395098775, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2359085771585567, + "kl": 0.01556396484375, + "learning_rate": 4.2310043263459063e-07, + "loss": 0.0117, + "num_tokens": 114366373.0, + "reward": 0.0, + "reward_std": 0.9642835259437561, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05681254177420511, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05796976490808804, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12049281521534187, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2528 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1307.6875, + "completions/mean_terminated_length": 1220.272705078125, + "completions/min_length": 989.0, + "completions/min_terminated_length": 989.0, + "epoch": 0.6324081020255063, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1243451936819127, + "kl": 0.013885498046875, + "learning_rate": 4.2272361507741325e-07, + "loss": -0.0066, + "num_tokens": 114419944.0, + "reward": 0.0, + "reward_std": 0.7071244716644287, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.003933724050091505, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04325326749908963, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11474609652039006, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2529 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1275.0, + "completions/max_terminated_length": 1275.0, + "completions/mean_length": 1030.25, + "completions/mean_terminated_length": 1030.25, + "completions/min_length": 564.0, + "completions/min_terminated_length": 564.0, + "epoch": 0.6326581645411353, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6780478340801293, + "kl": 0.021942138671875, + "learning_rate": 4.223468945006101e-07, + "loss": 0.0517, + "num_tokens": 114460756.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8153190612792969, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04987348716936132, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14057204277052138, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10532137766186216, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2530 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1242.375, + "completions/mean_terminated_length": 1182.923095703125, + "completions/min_length": 891.0, + "completions/min_terminated_length": 891.0, + "epoch": 0.6329082270567642, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1434433436641758, + "kl": 0.0155487060546875, + "learning_rate": 4.2197027119122985e-07, + "loss": -0.0322, + "num_tokens": 114508922.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.943358838558197, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0028420837393489697, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.022750672228471495, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12292725943057185, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2531 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1124.3125, + "completions/mean_terminated_length": 1099.2667236328125, + "completions/min_length": 821.0, + "completions/min_terminated_length": 821.0, + "epoch": 0.6331582895723931, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.739424344081914, + "kl": 0.01312255859375, + "learning_rate": 4.2159374543624705e-07, + "loss": 0.0185, + "num_tokens": 114549983.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0600740909576416, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.043590075252831706, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13792881850769748, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823627, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2532 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1285.0, + "completions/mean_length": 1142.875, + "completions/mean_terminated_length": 1119.0667724609375, + "completions/min_length": 916.0, + "completions/min_terminated_length": 916.0, + "epoch": 0.633408352088022, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.17491099021234, + "kl": 0.0171051025390625, + "learning_rate": 4.2121731752256154e-07, + "loss": 0.0111, + "num_tokens": 114586933.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9826964735984802, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08661088290405328, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.090767917843753, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11013459778666118, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2533 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1388.6875, + "completions/mean_terminated_length": 1302.111083984375, + "completions/min_length": 1115.0, + "completions/min_terminated_length": 1115.0, + "epoch": 0.6336584146036509, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1521458328787224, + "kl": 0.017578125, + "learning_rate": 4.2084098773699874e-07, + "loss": -0.0051, + "num_tokens": 114624112.0, + "reward": 0.0, + "reward_std": 1.0448672771453857, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0049501204942843345, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07758705503380714, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08421753138505422, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2534 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1190.125, + "completions/mean_terminated_length": 1049.272705078125, + "completions/min_length": 809.0, + "completions/min_terminated_length": 809.0, + "epoch": 0.6339084771192798, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3438535542403547, + "kl": 0.015716552734375, + "learning_rate": 4.2046475636631e-07, + "loss": -0.0623, + "num_tokens": 114676314.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0532866716384888, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.006681916546988983, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04252631048959739, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05900408021045226, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2535 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1404.0, + "completions/max_terminated_length": 1404.0, + "completions/mean_length": 957.75, + "completions/mean_terminated_length": 957.75, + "completions/min_length": 599.0, + "completions/min_terminated_length": 599.0, + "epoch": 0.6341585396349088, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4650399967381507, + "kl": 0.0148773193359375, + "learning_rate": 4.2008862369717067e-07, + "loss": -0.0616, + "num_tokens": 114710054.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0108177661895752, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.030248745037916497, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06329307291833683, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1180081604209045, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2536 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1310.0, + "completions/mean_length": 1131.75, + "completions/mean_terminated_length": 1107.2000732421875, + "completions/min_length": 774.0, + "completions/min_terminated_length": 774.0, + "epoch": 0.6344086021505376, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5961262078629983, + "kl": 0.0211181640625, + "learning_rate": 4.197125900161812e-07, + "loss": -0.1107, + "num_tokens": 114757322.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0282472372055054, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08512589926797713, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07181269005354787, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408155, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2537 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1354.0, + "completions/mean_length": 1172.5, + "completions/mean_terminated_length": 1096.923095703125, + "completions/min_length": 702.0, + "completions/min_terminated_length": 702.0, + "epoch": 0.6346586646661665, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7348386209132394, + "kl": 0.0152435302734375, + "learning_rate": 4.193366556098674e-07, + "loss": -0.0255, + "num_tokens": 114791786.0, + "reward": 0.0, + "reward_std": 0.9864680767059326, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03600487177922539, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1632462692965701, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09651328828101764, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2538 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1270.8125, + "completions/mean_terminated_length": 1166.6363525390625, + "completions/min_length": 879.0, + "completions/min_terminated_length": 879.0, + "epoch": 0.6349087271817955, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.709745680634844, + "kl": 0.0160980224609375, + "learning_rate": 4.189608207646783e-07, + "loss": -0.0276, + "num_tokens": 114836279.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6192532181739807, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07066335987945127, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05054829570589991, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11474609652039006, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2539 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1327.0, + "completions/max_terminated_length": 1327.0, + "completions/mean_length": 1071.9375, + "completions/mean_terminated_length": 1071.9375, + "completions/min_length": 828.0, + "completions/min_terminated_length": 828.0, + "epoch": 0.6351587896974243, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.762585495790988, + "kl": 0.011993408203125, + "learning_rate": 4.185850857669876e-07, + "loss": 0.0219, + "num_tokens": 114881374.0, + "reward": 0.0, + "reward_std": 0.5702900886535645, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.032809042846898416, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1048893814928695, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087683, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2540 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1467.0, + "completions/mean_length": 1201.125, + "completions/mean_terminated_length": 1181.2000732421875, + "completions/min_length": 751.0, + "completions/min_terminated_length": 751.0, + "epoch": 0.6354088522130532, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.928915100112452, + "kl": 0.0163116455078125, + "learning_rate": 4.1820945090309303e-07, + "loss": 0.0211, + "num_tokens": 114927000.0, + "reward": 0.0, + "reward_std": 0.9822953939437866, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.028965325788133482, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10503238969351344, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10809803506625451, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2541 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1480.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1173.125, + "completions/mean_terminated_length": 1173.125, + "completions/min_length": 920.0, + "completions/min_terminated_length": 920.0, + "epoch": 0.6356589147286822, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3874257330483446, + "kl": 0.020172119140625, + "learning_rate": 4.178339164592156e-07, + "loss": -0.0453, + "num_tokens": 114972986.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8649427890777588, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09199421186245682, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08050837395594339, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316813, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2542 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1484.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1040.5, + "completions/mean_terminated_length": 1040.5, + "completions/min_length": 473.0, + "completions/min_terminated_length": 473.0, + "epoch": 0.6359089772443111, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2288924166397512, + "kl": 0.0157470703125, + "learning_rate": 4.174584827215004e-07, + "loss": 0.0282, + "num_tokens": 115004954.0, + "reward": 0.0, + "reward_std": 0.7869876623153687, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2543 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1459.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1160.5625, + "completions/mean_terminated_length": 1160.5625, + "completions/min_length": 836.0, + "completions/min_terminated_length": 836.0, + "epoch": 0.63615903975994, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6242378192136573, + "kl": 0.0118865966796875, + "learning_rate": 4.170831499760152e-07, + "loss": -0.0469, + "num_tokens": 115048315.0, + "reward": 0.0, + "reward_std": 0.5633987784385681, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.21462197669860367, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.168229808385343, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2544 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1421.0, + "completions/mean_length": 1275.75, + "completions/mean_terminated_length": 1173.8182373046875, + "completions/min_length": 929.0, + "completions/min_terminated_length": 929.0, + "epoch": 0.6364091022755689, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.087163353971754, + "kl": 0.014007568359375, + "learning_rate": 4.1670791850875144e-07, + "loss": 0.0081, + "num_tokens": 115097831.0, + "reward": 0.0, + "reward_std": 0.9952833652496338, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06949358644661219, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10136584598044499, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0969917904124231, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2545 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1327.0, + "completions/mean_length": 1223.125, + "completions/mean_terminated_length": 1159.2308349609375, + "completions/min_length": 987.0, + "completions/min_terminated_length": 987.0, + "epoch": 0.6366591647911978, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.212572271527495, + "kl": 0.01776123046875, + "learning_rate": 4.163327886056226e-07, + "loss": 0.0012, + "num_tokens": 115139785.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0018807649612427, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14375905768565217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2546 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1359.0, + "completions/mean_length": 1168.6875, + "completions/mean_terminated_length": 1092.2308349609375, + "completions/min_length": 837.0, + "completions/min_terminated_length": 837.0, + "epoch": 0.6369092273068268, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3948583358808175, + "kl": 0.01727294921875, + "learning_rate": 4.1595776055246544e-07, + "loss": -0.0583, + "num_tokens": 115183132.0, + "reward": 0.0, + "reward_std": 0.8522399663925171, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01241332240127597, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05644333689214007, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11547005383792516, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2547 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.8125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1486.0625, + "completions/mean_terminated_length": 1425.666748046875, + "completions/min_length": 1369.0, + "completions/min_terminated_length": 1369.0, + "epoch": 0.6371592898224556, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.959447273572795, + "kl": 0.0135650634765625, + "learning_rate": 4.1558283463503863e-07, + "loss": 0.0057, + "num_tokens": 115248565.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8127931952476501, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0069711740931594005, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1605783902286321, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17758670287225065, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2548 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1145.0, + "completions/max_terminated_length": 1145.0, + "completions/mean_length": 994.875, + "completions/mean_terminated_length": 994.875, + "completions/min_length": 754.0, + "completions/min_terminated_length": 754.0, + "epoch": 0.6374093523380845, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7300186597149727, + "kl": 0.01953125, + "learning_rate": 4.152080111390236e-07, + "loss": -0.0242, + "num_tokens": 115293083.0, + "reward": 0.0, + "reward_std": 0.5810505747795105, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10103549601961581, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18761099653614177, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14298407059684812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2549 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1334.0, + "completions/max_terminated_length": 1334.0, + "completions/mean_length": 1173.8125, + "completions/mean_terminated_length": 1173.8125, + "completions/min_length": 1012.0, + "completions/min_terminated_length": 1012.0, + "epoch": 0.6376594148537135, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.919455943878928, + "kl": 0.012969970703125, + "learning_rate": 4.148332903500232e-07, + "loss": -0.0054, + "num_tokens": 115332064.0, + "reward": 0.0, + "reward_std": 0.7352269291877747, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.024552529836496417, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04500451644832279, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869929, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2550 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1360.0, + "completions/mean_length": 1261.875, + "completions/mean_terminated_length": 1119.0, + "completions/min_length": 923.0, + "completions/min_terminated_length": 923.0, + "epoch": 0.6379094773693423, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0695476655391714, + "kl": 0.0163421630859375, + "learning_rate": 4.1445867255356204e-07, + "loss": -0.0197, + "num_tokens": 115378534.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5612432956695557, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06625089665079029, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07537174278565142, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298362, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2551 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1360.25, + "completions/mean_terminated_length": 1313.666748046875, + "completions/min_length": 1068.0, + "completions/min_terminated_length": 1068.0, + "epoch": 0.6381595398849712, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0002014403031727, + "kl": 0.016632080078125, + "learning_rate": 4.1408415803508676e-07, + "loss": -0.0397, + "num_tokens": 115433274.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0065031051635742, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.033999545258724805, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08105020885761678, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1593970119149271, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2552 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1322.0, + "completions/mean_terminated_length": 1241.0909423828125, + "completions/min_length": 1067.0, + "completions/min_terminated_length": 1067.0, + "epoch": 0.6384096024006002, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2770492382241327, + "kl": 0.017974853515625, + "learning_rate": 4.13709747079965e-07, + "loss": 0.0078, + "num_tokens": 115486706.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7430444955825806, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.020620309054052374, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1472316781139842, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11917929226045819, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2553 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1299.75, + "completions/mean_terminated_length": 1208.727294921875, + "completions/min_length": 894.0, + "completions/min_terminated_length": 894.0, + "epoch": 0.638659664916229, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9760715404670255, + "kl": 0.016204833984375, + "learning_rate": 4.1333543997348506e-07, + "loss": 0.0348, + "num_tokens": 115522222.0, + "reward": 0.0, + "reward_std": 0.9638098478317261, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.036017545581708964, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04264710773352997, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09953596037316068, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2554 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1378.0, + "completions/mean_length": 1152.0, + "completions/mean_terminated_length": 1128.800048828125, + "completions/min_length": 894.0, + "completions/min_terminated_length": 894.0, + "epoch": 0.6389097274318579, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.778901364237668, + "kl": 0.01861572265625, + "learning_rate": 4.1296123700085683e-07, + "loss": 0.0092, + "num_tokens": 115554998.0, + "reward": 0.0, + "reward_std": 0.5988049507141113, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.4018561225291392, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.256479219164469, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09651328828101766, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2555 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 1251.25, + "completions/mean_terminated_length": 1168.3333740234375, + "completions/min_length": 587.0, + "completions/min_terminated_length": 587.0, + "epoch": 0.6391597899474869, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.850124190853961, + "kl": 0.0171051025390625, + "learning_rate": 4.125871384472105e-07, + "loss": 0.0256, + "num_tokens": 115598698.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.4954206943511963, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06287022690295979, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18127107313185573, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620107, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2556 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1431.0, + "completions/mean_length": 1325.5, + "completions/mean_terminated_length": 1267.3333740234375, + "completions/min_length": 1102.0, + "completions/min_terminated_length": 1102.0, + "epoch": 0.6394098524631158, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5332880618141305, + "kl": 0.01348876953125, + "learning_rate": 4.122131445975966e-07, + "loss": -0.0164, + "num_tokens": 115647458.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9328893423080444, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12773643931661627, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07236872050211984, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09953596037316068, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2557 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1140.0, + "completions/mean_length": 899.9375, + "completions/mean_terminated_length": 859.933349609375, + "completions/min_length": 633.0, + "completions/min_terminated_length": 633.0, + "epoch": 0.6396599149787446, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8656333002425614, + "kl": 0.0140228271484375, + "learning_rate": 4.1183925573698596e-07, + "loss": -0.0703, + "num_tokens": 115681809.0, + "reward": 0.0, + "reward_std": 0.6372206211090088, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.14124219204819152, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17042755802128096, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2558 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1310.0, + "completions/max_terminated_length": 1310.0, + "completions/mean_length": 1049.4375, + "completions/mean_terminated_length": 1049.4375, + "completions/min_length": 878.0, + "completions/min_terminated_length": 878.0, + "epoch": 0.6399099774943736, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.105188703110228, + "kl": 0.014739990234375, + "learning_rate": 4.1146547215026984e-07, + "loss": 0.0048, + "num_tokens": 115715552.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8274568319320679, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1480223050428887, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08681008634860159, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12285191326386657, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2559 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1401.125, + "completions/mean_terminated_length": 1324.2222900390625, + "completions/min_length": 819.0, + "completions/min_terminated_length": 819.0, + "epoch": 0.6401600400100025, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.724720472365392, + "kl": 0.012542724609375, + "learning_rate": 4.1109179412225857e-07, + "loss": -0.0147, + "num_tokens": 115768554.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.061547875404358, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.007075767637899279, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06795075873938874, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13957607775504183, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2560 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1436.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1148.4375, + "completions/mean_terminated_length": 1148.4375, + "completions/min_length": 835.0, + "completions/min_terminated_length": 835.0, + "epoch": 0.6404101025256314, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5178226259876118, + "kl": 0.020721435546875, + "learning_rate": 4.107182219376825e-07, + "loss": 0.001, + "num_tokens": 115823753.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0467597246170044, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1840301443147889, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18863959867228416, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563383, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2561 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1323.0, + "completions/max_terminated_length": 1323.0, + "completions/mean_length": 1216.1875, + "completions/mean_terminated_length": 1216.1875, + "completions/min_length": 1007.0, + "completions/min_terminated_length": 1007.0, + "epoch": 0.6406601650412603, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.334783032815991, + "kl": 0.007762908935546875, + "learning_rate": 4.103447558811911e-07, + "loss": 0.0161, + "num_tokens": 115865148.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9075245261192322, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.053317022511962894, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14642378237621118, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2562 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1390.0625, + "completions/mean_terminated_length": 1206.8333740234375, + "completions/min_length": 716.0, + "completions/min_terminated_length": 716.0, + "epoch": 0.6409102275568892, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6703962405007893, + "kl": 0.013824462890625, + "learning_rate": 4.0997139623735324e-07, + "loss": 0.0309, + "num_tokens": 115924405.0, + "reward": 0.0, + "reward_std": 0.6121705174446106, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.13657787162310148, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12151209646197153, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0824396524513313, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2563 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1244.5, + "completions/mean_terminated_length": 1227.4666748046875, + "completions/min_length": 941.0, + "completions/min_terminated_length": 941.0, + "epoch": 0.6411602900725182, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9654682665278265, + "kl": 0.01348876953125, + "learning_rate": 4.0959814329065655e-07, + "loss": -0.0245, + "num_tokens": 115974701.0, + "reward": 0.0, + "reward_std": 1.015746831893921, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.3147854558495078, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19170871350233873, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620104, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2564 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1023.0, + "completions/mean_terminated_length": 912.923095703125, + "completions/min_length": 559.0, + "completions/min_terminated_length": 559.0, + "epoch": 0.641410352588147, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7230407498454277, + "kl": 0.02001953125, + "learning_rate": 4.0922499732550717e-07, + "loss": 0.0076, + "num_tokens": 116021397.0, + "reward": 0.0, + "reward_std": 0.6693112254142761, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07297228695029498, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08207316884323852, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12041594578792297, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2565 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1428.0, + "completions/mean_length": 1283.6875, + "completions/mean_terminated_length": 1067.375, + "completions/min_length": 857.0, + "completions/min_terminated_length": 857.0, + "epoch": 0.6416604151037759, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.941702579911271, + "kl": 0.017913818359375, + "learning_rate": 4.088519586262302e-07, + "loss": -0.0032, + "num_tokens": 116059456.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8263548612594604, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.030972451593820602, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10815757305509037, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2566 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1250.0, + "completions/mean_length": 1353.125, + "completions/mean_terminated_length": 1108.3333740234375, + "completions/min_length": 985.0, + "completions/min_terminated_length": 985.0, + "epoch": 0.6419104776194049, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0769797348495027, + "kl": 0.016876220703125, + "learning_rate": 4.0847902747706863e-07, + "loss": -0.0197, + "num_tokens": 116114482.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.020566463470459, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05943247174563197, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10551365743927646, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11855612829185828, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2567 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1450.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1171.0625, + "completions/mean_terminated_length": 1171.0625, + "completions/min_length": 941.0, + "completions/min_terminated_length": 941.0, + "epoch": 0.6421605401350338, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5317871755655865, + "kl": 0.014556884765625, + "learning_rate": 4.081062041621837e-07, + "loss": -0.0381, + "num_tokens": 116160219.0, + "reward": 0.0, + "reward_std": 0.8831371068954468, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.041153093380921164, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.041700668841236424, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000003, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2568 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1413.0, + "completions/mean_length": 1195.5625, + "completions/mean_terminated_length": 1094.0833740234375, + "completions/min_length": 850.0, + "completions/min_terminated_length": 850.0, + "epoch": 0.6424106026506626, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7047116707936443, + "kl": 0.022796630859375, + "learning_rate": 4.0773348896565384e-07, + "loss": -0.0067, + "num_tokens": 116203236.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.43821969628334045, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03144209352899624, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09822341821455015, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10183501544346311, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2569 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1211.1875, + "completions/mean_terminated_length": 1191.933349609375, + "completions/min_length": 783.0, + "completions/min_terminated_length": 783.0, + "epoch": 0.6426606651662916, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.933548465649316, + "kl": 0.021270751953125, + "learning_rate": 4.0736088217147635e-07, + "loss": -0.0198, + "num_tokens": 116252151.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9770416617393494, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06004109221728997, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11367764602707472, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2570 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1144.5625, + "completions/mean_terminated_length": 1062.5384521484375, + "completions/min_length": 696.0, + "completions/min_terminated_length": 696.0, + "epoch": 0.6429107276819205, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7778764175573296, + "kl": 0.0146636962890625, + "learning_rate": 4.0698838406356493e-07, + "loss": -0.0802, + "num_tokens": 116300208.0, + "reward": 0.0, + "reward_std": 0.9763317704200745, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.029657922095394553, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06366187715229865, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11213417888437975, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2571 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1331.0, + "completions/max_terminated_length": 1331.0, + "completions/mean_length": 1148.6875, + "completions/mean_terminated_length": 1148.6875, + "completions/min_length": 920.0, + "completions/min_terminated_length": 920.0, + "epoch": 0.6431607901975493, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7206894535603996, + "kl": 0.02197265625, + "learning_rate": 4.0661599492575047e-07, + "loss": -0.0025, + "num_tokens": 116345411.0, + "reward": 4.470348358154297e-08, + "reward_std": 0.9293826222419739, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12025305200875727, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09936938721095302, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09803627446568497, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2572 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1248.5625, + "completions/mean_terminated_length": 1134.272705078125, + "completions/min_length": 945.0, + "completions/min_terminated_length": 945.0, + "epoch": 0.6434108527131783, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.25202883325891, + "kl": 0.019683837890625, + "learning_rate": 4.062437150417818e-07, + "loss": 0.0142, + "num_tokens": 116396372.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0377016067504883, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005319057365506314, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06596238985776215, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11547005383792516, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2573 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1282.0, + "completions/max_terminated_length": 1282.0, + "completions/mean_length": 997.9375, + "completions/mean_terminated_length": 997.9375, + "completions/min_length": 609.0, + "completions/min_terminated_length": 609.0, + "epoch": 0.6436609152288072, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.553820708362027, + "kl": 0.0183563232421875, + "learning_rate": 4.0587154469532316e-07, + "loss": -0.035, + "num_tokens": 116444819.0, + "reward": 0.0, + "reward_std": 0.7521826028823853, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03790998218677638, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16083138335322417, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12164002752505566, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 2574 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 1340.0, + "completions/mean_terminated_length": 1286.666748046875, + "completions/min_length": 1141.0, + "completions/min_terminated_length": 1141.0, + "epoch": 0.6439109777444361, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6270777137289327, + "kl": 0.0123138427734375, + "learning_rate": 4.0549948416995616e-07, + "loss": -0.0169, + "num_tokens": 116494835.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8791605830192566, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.331112800691878, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2875791865734574, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10318986456114838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2575 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1234.125, + "completions/mean_terminated_length": 1113.272705078125, + "completions/min_length": 573.0, + "completions/min_terminated_length": 573.0, + "epoch": 0.644161040260065, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.960609183521779, + "kl": 0.015045166015625, + "learning_rate": 4.0512753374917875e-07, + "loss": -0.0549, + "num_tokens": 116544885.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9612252712249756, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.030468574560513446, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.061996142185221675, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1276569477008451, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2576 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1281.0625, + "completions/mean_terminated_length": 1249.7857666015625, + "completions/min_length": 1057.0, + "completions/min_terminated_length": 1057.0, + "epoch": 0.6444111027756939, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.405694896231441, + "kl": 0.0142059326171875, + "learning_rate": 4.047556937164046e-07, + "loss": -0.0236, + "num_tokens": 116598582.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8950363993644714, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1179376315043699, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07733622638898142, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0850925422157591, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2577 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1140.5, + "completions/mean_terminated_length": 1116.533447265625, + "completions/min_length": 749.0, + "completions/min_terminated_length": 749.0, + "epoch": 0.6446611652913228, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.088013038045507, + "kl": 0.0132904052734375, + "learning_rate": 4.0438396435496356e-07, + "loss": 0.0105, + "num_tokens": 116643446.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.972887396812439, + "rewards/wordcountpos_reward_GEOBench/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0030764131107583227, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06500625800338271, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15147423690002354, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2578 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1223.375, + "completions/mean_terminated_length": 1008.2222290039062, + "completions/min_length": 688.0, + "completions/min_terminated_length": 688.0, + "epoch": 0.6449112278069518, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9714641359514373, + "kl": 0.0201568603515625, + "learning_rate": 4.040123459481009e-07, + "loss": -0.0383, + "num_tokens": 116699644.0, + "reward": 0.0, + "reward_std": 0.9883487820625305, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06410312184923374, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0837500454137008, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0909822937597079, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2579 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1394.0, + "completions/max_terminated_length": 1394.0, + "completions/mean_length": 1003.5, + "completions/mean_terminated_length": 1003.5, + "completions/min_length": 767.0, + "completions/min_terminated_length": 767.0, + "epoch": 0.6451612903225806, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9297090820740004, + "kl": 0.0130157470703125, + "learning_rate": 4.036408387789776e-07, + "loss": 0.0018, + "num_tokens": 116734492.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9960783123970032, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08288732907822556, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1513656774921026, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1128748897706693, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2580 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1376.0, + "completions/mean_length": 1171.6875, + "completions/mean_terminated_length": 1022.45458984375, + "completions/min_length": 919.0, + "completions/min_terminated_length": 919.0, + "epoch": 0.6454113528382096, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9665828599603716, + "kl": 0.005893707275390625, + "learning_rate": 4.032694431306698e-07, + "loss": -0.0193, + "num_tokens": 116780111.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8624330759048462, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06938369788274412, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18238695867327182, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08432740427115681, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2581 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1124.0, + "completions/mean_terminated_length": 1098.933349609375, + "completions/min_length": 578.0, + "completions/min_terminated_length": 578.0, + "epoch": 0.6456614153538385, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.31824986396329, + "kl": 0.0164337158203125, + "learning_rate": 4.028981592861687e-07, + "loss": 0.0291, + "num_tokens": 116831399.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6936956644058228, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04610248619786078, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10719711939790513, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09727776191382574, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2582 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1496.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1252.6875, + "completions/mean_terminated_length": 1252.6875, + "completions/min_length": 975.0, + "completions/min_terminated_length": 975.0, + "epoch": 0.6459114778694673, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9458021836535546, + "kl": 0.0258636474609375, + "learning_rate": 4.0252698752837986e-07, + "loss": 0.0198, + "num_tokens": 116870938.0, + "reward": 0.0, + "reward_std": 0.7310017943382263, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.2015660916839365, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.24137353360707414, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11917929226045819, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2583 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 1295.5, + "completions/mean_terminated_length": 1227.3333740234375, + "completions/min_length": 1051.0, + "completions/min_terminated_length": 1051.0, + "epoch": 0.6461615403850963, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1216754623588963, + "kl": 0.0142059326171875, + "learning_rate": 4.0215592814012434e-07, + "loss": 0.0193, + "num_tokens": 116914914.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0178442001342773, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02333375782385983, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08603070301779407, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2584 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1393.0, + "completions/mean_length": 1026.0625, + "completions/mean_terminated_length": 994.4667358398438, + "completions/min_length": 706.0, + "completions/min_terminated_length": 706.0, + "epoch": 0.6464116029007252, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.748401192707013, + "kl": 0.0203857421875, + "learning_rate": 4.0178498140413674e-07, + "loss": -0.04, + "num_tokens": 116948523.0, + "reward": 2.2351741790771484e-08, + "reward_std": 0.9409335255622864, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009726541716701231, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03623491305496995, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353542, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2585 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1381.0, + "completions/max_terminated_length": 1381.0, + "completions/mean_length": 971.0, + "completions/mean_terminated_length": 971.0, + "completions/min_length": 519.0, + "completions/min_terminated_length": 519.0, + "epoch": 0.6466616654163541, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.144874031837967, + "kl": 0.02484130859375, + "learning_rate": 4.0141414760306623e-07, + "loss": -0.0231, + "num_tokens": 116985091.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9525685906410217, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.014379183853557324, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06498888240823489, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2586 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1467.0, + "completions/mean_length": 1295.0, + "completions/mean_terminated_length": 1226.666748046875, + "completions/min_length": 953.0, + "completions/min_terminated_length": 953.0, + "epoch": 0.646911727931983, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5577058542818616, + "kl": 0.01483154296875, + "learning_rate": 4.010434270194759e-07, + "loss": 0.0171, + "num_tokens": 117026243.0, + "reward": 0.0, + "reward_std": 0.7546505928039551, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10418489694795013, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07859659675758318, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869927, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 2587 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 1232.375, + "completions/mean_terminated_length": 1143.166748046875, + "completions/min_length": 769.0, + "completions/min_terminated_length": 769.0, + "epoch": 0.6471617904476119, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2333253702887426, + "kl": 0.0178375244140625, + "learning_rate": 4.0067281993584256e-07, + "loss": -0.0057, + "num_tokens": 117078673.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9449419975280762, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04693343068431377, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14633087038702997, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0909822937597079, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2588 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1353.0, + "completions/mean_length": 928.875, + "completions/mean_terminated_length": 890.800048828125, + "completions/min_length": 604.0, + "completions/min_terminated_length": 604.0, + "epoch": 0.6474118529632408, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8712278013361914, + "kl": 0.0159149169921875, + "learning_rate": 4.003023266345562e-07, + "loss": -0.049, + "num_tokens": 117116703.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7258220314979553, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.033792851438693636, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07393301085985911, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2589 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1347.0, + "completions/mean_length": 1163.125, + "completions/mean_terminated_length": 1115.0, + "completions/min_length": 838.0, + "completions/min_terminated_length": 838.0, + "epoch": 0.6476619154788698, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7978035276481954, + "kl": 0.0144195556640625, + "learning_rate": 3.999319473979205e-07, + "loss": -0.0193, + "num_tokens": 117151169.0, + "reward": -5.587935447692871e-09, + "reward_std": 1.0529167652130127, + "rewards/wordcountpos_reward_GEOBench/mean": -5.587935447692871e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0085464601885747, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.055088124880204536, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10183501544346313, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2590 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1326.0, + "completions/mean_length": 1107.25, + "completions/mean_terminated_length": 1016.6154174804688, + "completions/min_length": 821.0, + "completions/min_terminated_length": 821.0, + "epoch": 0.6479119779944986, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7770712186963378, + "kl": 0.0143280029296875, + "learning_rate": 3.9956168250815235e-07, + "loss": -0.0343, + "num_tokens": 117190821.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.046122431755066, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010028023030872581, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05952441921681068, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059628479399994404, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2591 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1220.0, + "completions/mean_length": 1330.875, + "completions/mean_terminated_length": 1049.0, + "completions/min_length": 890.0, + "completions/min_terminated_length": 890.0, + "epoch": 0.6481620405101275, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5240295903785235, + "kl": 0.00982666015625, + "learning_rate": 3.991915322473811e-07, + "loss": -0.0032, + "num_tokens": 117237643.0, + "reward": 0.0, + "reward_std": 1.0177603960037231, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.022796314133765733, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09616126394514385, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333334, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2592 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1331.0, + "completions/max_terminated_length": 1331.0, + "completions/mean_length": 942.4375, + "completions/mean_terminated_length": 942.4375, + "completions/min_length": 602.0, + "completions/min_terminated_length": 602.0, + "epoch": 0.6484121030257565, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8004160169460843, + "kl": 0.01690673828125, + "learning_rate": 3.9882149689764877e-07, + "loss": 0.0044, + "num_tokens": 117268658.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7105497121810913, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09217302596767635, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1163806910642427, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620104, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2593 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1429.5625, + "completions/mean_terminated_length": 1359.125, + "completions/min_length": 1159.0, + "completions/min_terminated_length": 1159.0, + "epoch": 0.6486621655413853, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4528214923070943, + "kl": 0.0107421875, + "learning_rate": 3.9845157674091044e-07, + "loss": 0.0023, + "num_tokens": 117319859.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0567948818206787, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.024989133451566906, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04958814739256882, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316811, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2594 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1241.4375, + "completions/mean_terminated_length": 1086.300048828125, + "completions/min_length": 864.0, + "completions/min_terminated_length": 864.0, + "epoch": 0.6489122280570142, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7175821291311295, + "kl": 0.011688232421875, + "learning_rate": 3.980817720590327e-07, + "loss": -0.0319, + "num_tokens": 117365794.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.4925883412361145, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02678553469351088, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15989325551539402, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2595 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.75, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1480.3125, + "completions/mean_terminated_length": 1421.25, + "completions/min_length": 1383.0, + "completions/min_terminated_length": 1383.0, + "epoch": 0.6491622905726432, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.433252513702653, + "kl": 0.011322021484375, + "learning_rate": 3.9771208313379454e-07, + "loss": 0.0013, + "num_tokens": 117420311.0, + "reward": 0.0, + "reward_std": 0.8392419815063477, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07584084762127169, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1248858667401481, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466157, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2596 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1347.0, + "completions/max_terminated_length": 1347.0, + "completions/mean_length": 1084.5625, + "completions/mean_terminated_length": 1084.5625, + "completions/min_length": 771.0, + "completions/min_terminated_length": 771.0, + "epoch": 0.649412353088272, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7427765999723617, + "kl": 0.0142364501953125, + "learning_rate": 3.9734251024688637e-07, + "loss": 0.0612, + "num_tokens": 117472408.0, + "reward": 0.0, + "reward_std": 0.8324097990989685, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.2602312822783914, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16576429559902217, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1276569477008451, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2597 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1435.0, + "completions/mean_length": 1206.5, + "completions/mean_terminated_length": 1073.0909423828125, + "completions/min_length": 799.0, + "completions/min_terminated_length": 799.0, + "epoch": 0.6496624156039009, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4915714767311674, + "kl": 0.017730712890625, + "learning_rate": 3.9697305367991075e-07, + "loss": 0.0008, + "num_tokens": 117518392.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0377135276794434, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.032197554500003514, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07444550712148791, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13601470508735444, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2598 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1215.0, + "completions/max_terminated_length": 1215.0, + "completions/mean_length": 968.5, + "completions/mean_terminated_length": 968.5, + "completions/min_length": 723.0, + "completions/min_terminated_length": 723.0, + "epoch": 0.6499124781195299, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.710152858328446, + "kl": 0.0158538818359375, + "learning_rate": 3.966037137143812e-07, + "loss": 0.0137, + "num_tokens": 117549896.0, + "reward": 0.0, + "reward_std": 0.9098241329193115, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03736907983477276, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05143178134393956, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07290277645477447, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2599 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1421.0, + "completions/mean_length": 1156.0625, + "completions/mean_terminated_length": 1133.1334228515625, + "completions/min_length": 936.0, + "completions/min_terminated_length": 936.0, + "epoch": 0.6501625406351588, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.671105584592867, + "kl": 0.02130126953125, + "learning_rate": 3.9623449063172234e-07, + "loss": 0.0273, + "num_tokens": 117596393.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.900663435459137, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.004573060209078027, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.052329403413158046, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2600 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1410.0, + "completions/mean_length": 1337.3125, + "completions/mean_terminated_length": 1210.77783203125, + "completions/min_length": 1106.0, + "completions/min_terminated_length": 1106.0, + "epoch": 0.6504126031507877, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5565621268624596, + "kl": 0.01361083984375, + "learning_rate": 3.9586538471327003e-07, + "loss": -0.0262, + "num_tokens": 117653062.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.8189833760261536, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05408053722177289, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16610056024236397, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2601 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1316.0625, + "completions/mean_terminated_length": 1289.7857666015625, + "completions/min_length": 1053.0, + "completions/min_terminated_length": 1053.0, + "epoch": 0.6506626656664166, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.067352721474509, + "kl": 0.0118408203125, + "learning_rate": 3.9549639624027076e-07, + "loss": -0.0114, + "num_tokens": 117695951.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.991165280342102, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.003048147886707997, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.100815928030495, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11122216672215288, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2602 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1115.0, + "completions/mean_length": 1012.125, + "completions/mean_terminated_length": 979.6000366210938, + "completions/min_length": 523.0, + "completions/min_terminated_length": 523.0, + "epoch": 0.6509127281820455, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.414888443332429, + "kl": 0.0175933837890625, + "learning_rate": 3.951275254938816e-07, + "loss": -0.0328, + "num_tokens": 117736169.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0238022804260254, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10390522747338703, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2603 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1458.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1038.5, + "completions/mean_terminated_length": 1038.5, + "completions/min_length": 678.0, + "completions/min_terminated_length": 678.0, + "epoch": 0.6511627906976745, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0325722966501627, + "kl": 0.0145111083984375, + "learning_rate": 3.9475877275516933e-07, + "loss": -0.0002, + "num_tokens": 117791345.0, + "reward": 0.0, + "reward_std": 0.836898922920227, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.22428551425476437, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13743699253104769, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1046156988431681, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 2604 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1451.0, + "completions/max_terminated_length": 1451.0, + "completions/mean_length": 1063.875, + "completions/mean_terminated_length": 1063.875, + "completions/min_length": 810.0, + "completions/min_terminated_length": 810.0, + "epoch": 0.6514128532133033, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.497030341409155, + "kl": 0.01885986328125, + "learning_rate": 3.943901383051117e-07, + "loss": -0.0043, + "num_tokens": 117842543.0, + "reward": 0.0, + "reward_std": 0.699130117893219, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04063257279627858, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11517693706733156, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16147468555186623, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2605 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 1095.6875, + "completions/mean_terminated_length": 1002.3846435546875, + "completions/min_length": 518.0, + "completions/min_terminated_length": 518.0, + "epoch": 0.6516629157289322, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.245801515676401, + "kl": 0.01953125, + "learning_rate": 3.940216224245958e-07, + "loss": -0.0184, + "num_tokens": 117882178.0, + "reward": 0.0, + "reward_std": 0.8272777795791626, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.17208995371918287, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0517235456633969, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11122216672215289, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2606 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1491.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1200.3125, + "completions/mean_terminated_length": 1200.3125, + "completions/min_length": 1027.0, + "completions/min_terminated_length": 1027.0, + "epoch": 0.6519129782445612, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.440122969669539, + "kl": 0.01531982421875, + "learning_rate": 3.936532253944185e-07, + "loss": -0.0101, + "num_tokens": 117928175.0, + "reward": 0.0, + "reward_std": 1.0122153759002686, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.053136627316585826, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11003739252986716, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1287403358472941, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2607 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 1149.5, + "completions/mean_terminated_length": 1099.4285888671875, + "completions/min_length": 898.0, + "completions/min_terminated_length": 898.0, + "epoch": 0.65216304076019, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.274496955535228, + "kl": 0.018768310546875, + "learning_rate": 3.932849474952857e-07, + "loss": -0.107, + "num_tokens": 117980295.0, + "reward": 0.0, + "reward_std": 0.937747597694397, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.017242545987517906, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03833580512173297, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08421753138505425, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2608 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1303.875, + "completions/mean_terminated_length": 1258.615478515625, + "completions/min_length": 929.0, + "completions/min_terminated_length": 929.0, + "epoch": 0.6524131032758189, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7317197351454525, + "kl": 0.017730712890625, + "learning_rate": 3.9291678900781367e-07, + "loss": -0.0281, + "num_tokens": 118036861.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5043466091156006, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09463854547523112, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09828730689068801, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715323, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2609 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1384.5625, + "completions/mean_terminated_length": 1294.77783203125, + "completions/min_length": 1157.0, + "completions/min_terminated_length": 1157.0, + "epoch": 0.6526631657914479, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2980132545843572, + "kl": 0.0191650390625, + "learning_rate": 3.925487502125264e-07, + "loss": 0.0023, + "num_tokens": 118096334.0, + "reward": 2.60770320892334e-08, + "reward_std": 1.0570056438446045, + "rewards/wordcountpos_reward_GEOBench/mean": 2.60770320892334e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02465472922334871, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10680621265169601, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10036968702787748, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2610 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1346.0, + "completions/mean_length": 1300.9375, + "completions/mean_terminated_length": 1181.5, + "completions/min_length": 1007.0, + "completions/min_terminated_length": 1007.0, + "epoch": 0.6529132283070768, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5605273646232756, + "kl": 0.0137786865234375, + "learning_rate": 3.9218083138985714e-07, + "loss": 0.051, + "num_tokens": 118147357.0, + "reward": 0.0, + "reward_std": 0.9947801828384399, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.055900611732755234, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06673063397537037, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237132, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2611 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1250.75, + "completions/mean_terminated_length": 1193.2308349609375, + "completions/min_length": 892.0, + "completions/min_terminated_length": 892.0, + "epoch": 0.6531632908227056, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.051031598957039, + "kl": 0.01666259765625, + "learning_rate": 3.9181303282014823e-07, + "loss": -0.0502, + "num_tokens": 118203969.0, + "reward": 0.0, + "reward_std": 0.8931825160980225, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019440879860876337, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20782771706398656, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11417984514369005, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2612 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1464.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1290.0, + "completions/mean_terminated_length": 1290.0, + "completions/min_length": 981.0, + "completions/min_terminated_length": 981.0, + "epoch": 0.6534133533383346, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.461276783401956, + "kl": 0.019439697265625, + "learning_rate": 3.914453547836497e-07, + "loss": -0.0126, + "num_tokens": 118254233.0, + "reward": 0.0, + "reward_std": 0.9860749840736389, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05659505549819309, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05806635391753169, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0850925422157591, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2613 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1435.0, + "completions/mean_length": 1293.5625, + "completions/mean_terminated_length": 1264.071533203125, + "completions/min_length": 1001.0, + "completions/min_terminated_length": 1001.0, + "epoch": 0.6536634158539635, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8032575952818957, + "kl": 0.019989013671875, + "learning_rate": 3.9107779756051996e-07, + "loss": 0.0009, + "num_tokens": 118295962.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.8657025098800659, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04819448873206462, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05676567762483398, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078611, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2614 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1360.9375, + "completions/mean_terminated_length": 1341.071533203125, + "completions/min_length": 1176.0, + "completions/min_terminated_length": 1176.0, + "epoch": 0.6539134783695923, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4650959744844725, + "kl": 0.013031005859375, + "learning_rate": 3.907103614308255e-07, + "loss": -0.0086, + "num_tokens": 118335881.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.4528198540210724, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.006678234472878096, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17094681698879793, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11080513425729777, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2615 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1094.3125, + "completions/mean_terminated_length": 1000.6923217773438, + "completions/min_length": 526.0, + "completions/min_terminated_length": 526.0, + "epoch": 0.6541635408852213, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2057499267370146, + "kl": 0.0193634033203125, + "learning_rate": 3.903430466745403e-07, + "loss": -0.0079, + "num_tokens": 118378046.0, + "reward": 0.0, + "reward_std": 0.8781936168670654, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.21351573434394794, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12301071861313821, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194864, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2616 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1458.625, + "completions/mean_terminated_length": 1389.666748046875, + "completions/min_length": 1218.0, + "completions/min_terminated_length": 1218.0, + "epoch": 0.6544136034008502, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7967009890861148, + "kl": 0.0126800537109375, + "learning_rate": 3.899758535715463e-07, + "loss": -0.0125, + "num_tokens": 118436104.0, + "reward": 0.0, + "reward_std": 0.5614475607872009, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03370325246839316, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08760179660888091, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04127594582445937, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2617 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 1160.9375, + "completions/mean_terminated_length": 1138.3333740234375, + "completions/min_length": 783.0, + "completions/min_terminated_length": 783.0, + "epoch": 0.6546636659164791, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6539586150546546, + "kl": 0.017242431640625, + "learning_rate": 3.89608782401632e-07, + "loss": 0.0221, + "num_tokens": 118481671.0, + "reward": 0.0, + "reward_std": 0.7902324795722961, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06446440136434572, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11716221974802864, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16666666666666669, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2618 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 1210.875, + "completions/mean_terminated_length": 1114.5, + "completions/min_length": 717.0, + "completions/min_terminated_length": 717.0, + "epoch": 0.654913728432108, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.128491067132597, + "kl": 0.0189208984375, + "learning_rate": 3.892418334444937e-07, + "loss": -0.048, + "num_tokens": 118533997.0, + "reward": 0.0, + "reward_std": 0.9831488132476807, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.024468727628863, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09764316084272918, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503963, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2619 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1459.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1057.3125, + "completions/mean_terminated_length": 1057.3125, + "completions/min_length": 815.0, + "completions/min_terminated_length": 815.0, + "epoch": 0.6551637909477369, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.534647318379648, + "kl": 0.0151519775390625, + "learning_rate": 3.888750069797344e-07, + "loss": -0.0093, + "num_tokens": 118570538.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8483592867851257, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.009310049904093697, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04645429569316657, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13662601021279466, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2620 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1429.5, + "completions/mean_terminated_length": 1359.0, + "completions/min_length": 1140.0, + "completions/min_terminated_length": 1140.0, + "epoch": 0.6554138534633659, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2934787239495984, + "kl": 0.00946044921875, + "learning_rate": 3.8850830328686344e-07, + "loss": -0.0132, + "num_tokens": 118614370.0, + "reward": 0.0, + "reward_std": 0.4947322607040405, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.006482298225683355, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11144372943772192, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15563490039905004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2621 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1255.5, + "completions/mean_terminated_length": 1199.0770263671875, + "completions/min_length": 696.0, + "completions/min_terminated_length": 696.0, + "epoch": 0.6556639159789948, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.811361298625861, + "kl": 0.0191650390625, + "learning_rate": 3.881417226452969e-07, + "loss": -0.0525, + "num_tokens": 118659394.0, + "reward": 0.0, + "reward_std": 1.0555965900421143, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0018047005038480286, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06432497568864513, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07490735018081414, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2622 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1120.75, + "completions/mean_terminated_length": 1033.2308349609375, + "completions/min_length": 640.0, + "completions/min_terminated_length": 640.0, + "epoch": 0.6559139784946236, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3681186988304956, + "kl": 0.028045654296875, + "learning_rate": 3.877752653343571e-07, + "loss": -0.0261, + "num_tokens": 118700718.0, + "reward": 0.0, + "reward_std": 0.9323616623878479, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05168632927115789, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0675341479626913, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13305526559931294, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2623 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1268.0, + "completions/max_terminated_length": 1268.0, + "completions/mean_length": 871.6875, + "completions/mean_terminated_length": 871.6875, + "completions/min_length": 726.0, + "completions/min_terminated_length": 726.0, + "epoch": 0.6561640410102526, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8933037741347032, + "kl": 0.00860595703125, + "learning_rate": 3.8740893163327247e-07, + "loss": 0.0451, + "num_tokens": 118742553.0, + "reward": 0.0, + "reward_std": 1.0157837867736816, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03721505755378061, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07115713546732924, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133132, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2624 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1242.0, + "completions/mean_length": 1188.0625, + "completions/mean_terminated_length": 1084.0833740234375, + "completions/min_length": 464.0, + "completions/min_terminated_length": 464.0, + "epoch": 0.6564141035258815, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0746470177829495, + "kl": 0.015228271484375, + "learning_rate": 3.870427218211766e-07, + "loss": 0.0172, + "num_tokens": 118792314.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.046790361404419, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0873236886723748, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10424230564967261, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1636957430610103, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2625 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1443.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1089.3125, + "completions/mean_terminated_length": 1089.3125, + "completions/min_length": 928.0, + "completions/min_terminated_length": 928.0, + "epoch": 0.6566641660415103, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8431157493588306, + "kl": 0.0125732421875, + "learning_rate": 3.8667663617710987e-07, + "loss": 0.0045, + "num_tokens": 118834535.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9623502492904663, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08403184527044964, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.056361655895701544, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11409872268574492, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2626 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1451.0, + "completions/max_terminated_length": 1451.0, + "completions/mean_length": 1067.5625, + "completions/mean_terminated_length": 1067.5625, + "completions/min_length": 857.0, + "completions/min_terminated_length": 857.0, + "epoch": 0.6569142285571393, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4726001949022236, + "kl": 0.019287109375, + "learning_rate": 3.863106749800169e-07, + "loss": 0.0225, + "num_tokens": 118877440.0, + "reward": 0.0, + "reward_std": 0.2628893256187439, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07576531627218454, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08854097706048751, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11344765475923412, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2627 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1498.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1188.3125, + "completions/mean_terminated_length": 1188.3125, + "completions/min_length": 931.0, + "completions/min_terminated_length": 931.0, + "epoch": 0.6571642910727682, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6326575178931337, + "kl": 0.012725830078125, + "learning_rate": 3.8594483850874826e-07, + "loss": -0.0061, + "num_tokens": 118913853.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5638790726661682, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.012415907454255814, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.053694031688719106, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09803627446568497, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2628 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1211.25, + "completions/mean_terminated_length": 1115.0, + "completions/min_length": 906.0, + "completions/min_terminated_length": 906.0, + "epoch": 0.6574143535883971, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2692510745551426, + "kl": 0.017913818359375, + "learning_rate": 3.8557912704205873e-07, + "loss": -0.0339, + "num_tokens": 118951617.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0441104173660278, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.023695464909514814, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07820710965576805, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1258305739211792, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2629 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1421.5625, + "completions/mean_terminated_length": 1360.5555419921875, + "completions/min_length": 1176.0, + "completions/min_terminated_length": 1176.0, + "epoch": 0.657664416104026, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6412057203123482, + "kl": 0.0125732421875, + "learning_rate": 3.852135408586088e-07, + "loss": 0.0107, + "num_tokens": 119011674.0, + "reward": 0.0, + "reward_std": 0.6745460629463196, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11264097998219003, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.188528986517095, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11894598836509011, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2630 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1185.0625, + "completions/mean_terminated_length": 1140.071533203125, + "completions/min_length": 857.0, + "completions/min_terminated_length": 857.0, + "epoch": 0.6579144786196549, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.709435483474456, + "kl": 0.01544189453125, + "learning_rate": 3.848480802369627e-07, + "loss": -0.0025, + "num_tokens": 119057579.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0415176153182983, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05858825325341162, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08466958907514924, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823629, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2631 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1330.0, + "completions/mean_length": 1340.125, + "completions/mean_terminated_length": 1180.25, + "completions/min_length": 970.0, + "completions/min_terminated_length": 970.0, + "epoch": 0.6581645411352838, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7440531586046655, + "kl": 0.0146942138671875, + "learning_rate": 3.8448274545558923e-07, + "loss": 0.0056, + "num_tokens": 119111509.0, + "reward": 0.0, + "reward_std": 0.9738819599151611, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.19515230738926587, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11761010630823682, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0825518916489187, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2632 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1376.125, + "completions/mean_terminated_length": 1279.77783203125, + "completions/min_length": 1106.0, + "completions/min_terminated_length": 1106.0, + "epoch": 0.6584146036509128, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.558497670093134, + "kl": 0.011016845703125, + "learning_rate": 3.8411753679286163e-07, + "loss": -0.0115, + "num_tokens": 119170423.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9249062538146973, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.074919530628666, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07885317859004269, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238706, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2633 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1054.5, + "completions/mean_terminated_length": 1024.800048828125, + "completions/min_length": 610.0, + "completions/min_terminated_length": 610.0, + "epoch": 0.6586646661665416, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3530493026190342, + "kl": 0.01959228515625, + "learning_rate": 3.837524545270566e-07, + "loss": -0.0951, + "num_tokens": 119204479.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7462056875228882, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.004525604665049364, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07199266057331047, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2634 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1189.0, + "completions/mean_length": 1285.25, + "completions/mean_terminated_length": 1070.5, + "completions/min_length": 879.0, + "completions/min_terminated_length": 879.0, + "epoch": 0.6589147286821705, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0994184248199423, + "kl": 0.00655364990234375, + "learning_rate": 3.833874989363547e-07, + "loss": -0.0205, + "num_tokens": 119255259.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.8264853358268738, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06830764922792648, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03294967215623002, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242309, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2635 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1137.875, + "completions/mean_terminated_length": 1113.7333984375, + "completions/min_length": 839.0, + "completions/min_terminated_length": 839.0, + "epoch": 0.6591647911977995, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9004354966881696, + "kl": 0.0126495361328125, + "learning_rate": 3.8302267029883973e-07, + "loss": -0.0155, + "num_tokens": 119297569.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8486278653144836, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03465042198993357, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06565150744870149, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10174405069512348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2636 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1403.8125, + "completions/mean_terminated_length": 1329.0, + "completions/min_length": 1173.0, + "completions/min_terminated_length": 1173.0, + "epoch": 0.6594148537134283, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.317422783635642, + "kl": 0.0113983154296875, + "learning_rate": 3.8265796889249935e-07, + "loss": -0.0142, + "num_tokens": 119338366.0, + "reward": 0.0, + "reward_std": 0.6504414081573486, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11413788697556386, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0826849161949034, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07302967433402216, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2637 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1254.625, + "completions/mean_terminated_length": 1238.2667236328125, + "completions/min_length": 918.0, + "completions/min_terminated_length": 918.0, + "epoch": 0.6596649162290573, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.894011807145749, + "kl": 0.01739501953125, + "learning_rate": 3.8229339499522374e-07, + "loss": -0.044, + "num_tokens": 119376168.0, + "reward": 2.0489096641540527e-08, + "reward_std": 1.067887544631958, + "rewards/wordcountpos_reward_GEOBench/mean": 2.0489096641540527e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.013655706175133626, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07300012183611214, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13655822255780922, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2638 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1238.0, + "completions/mean_length": 896.5625, + "completions/mean_terminated_length": 856.3333740234375, + "completions/min_length": 528.0, + "completions/min_terminated_length": 528.0, + "epoch": 0.6599149787446862, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.680285500777866, + "kl": 0.02972412109375, + "learning_rate": 3.819289488848061e-07, + "loss": 0.0325, + "num_tokens": 119408433.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8653352856636047, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03715197767997294, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10791640712149898, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11147163731607215, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2639 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1475.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1140.375, + "completions/mean_terminated_length": 1140.375, + "completions/min_length": 859.0, + "completions/min_terminated_length": 859.0, + "epoch": 0.660165041260315, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3375709649414755, + "kl": 0.020050048828125, + "learning_rate": 3.815646308389422e-07, + "loss": -0.0195, + "num_tokens": 119451775.0, + "reward": 0.0, + "reward_std": 0.681147038936615, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.032557385828699575, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14617710539640963, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503963, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2640 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1203.875, + "completions/mean_terminated_length": 1135.5384521484375, + "completions/min_length": 837.0, + "completions/min_terminated_length": 837.0, + "epoch": 0.660415103775944, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.928731839604882, + "kl": 0.02008056640625, + "learning_rate": 3.812004411352304e-07, + "loss": -0.0083, + "num_tokens": 119498685.0, + "reward": 0.0, + "reward_std": 0.9441503882408142, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02976885266184326, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1116099039266253, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07084150279686705, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2641 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1262.1875, + "completions/mean_terminated_length": 1207.3077392578125, + "completions/min_length": 995.0, + "completions/min_terminated_length": 995.0, + "epoch": 0.6606651662915729, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.252740002325529, + "kl": 0.0145263671875, + "learning_rate": 3.8083638005117123e-07, + "loss": -0.0543, + "num_tokens": 119556192.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.014634609222412, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03231372876504832, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16611702050776506, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6416666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1057600358603626, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2642 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1215.5, + "completions/mean_terminated_length": 1120.666748046875, + "completions/min_length": 970.0, + "completions/min_terminated_length": 970.0, + "epoch": 0.6609152288072018, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4157688121993344, + "kl": 0.009796142578125, + "learning_rate": 3.804724478641667e-07, + "loss": 0.0179, + "num_tokens": 119594888.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8881604671478271, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.044488078087206145, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.123003897998386, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2643 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1223.5, + "completions/mean_terminated_length": 1159.6923828125, + "completions/min_length": 871.0, + "completions/min_terminated_length": 871.0, + "epoch": 0.6611652913228308, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.776751717592287, + "kl": 0.01556396484375, + "learning_rate": 3.801086448515215e-07, + "loss": 0.0193, + "num_tokens": 119653840.0, + "reward": 0.0, + "reward_std": 0.7579807639122009, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07499958257998073, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0980059349458919, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0925962962222252, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2644 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1303.0, + "completions/max_terminated_length": 1303.0, + "completions/mean_length": 1090.625, + "completions/mean_terminated_length": 1090.625, + "completions/min_length": 817.0, + "completions/min_terminated_length": 817.0, + "epoch": 0.6614153538384596, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8923206469805858, + "kl": 0.0134735107421875, + "learning_rate": 3.7974497129044126e-07, + "loss": -0.0225, + "num_tokens": 119692058.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8029853105545044, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.002984273372469183, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09765086884034416, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0665276327996565, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2645 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1437.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1096.9375, + "completions/mean_terminated_length": 1096.9375, + "completions/min_length": 907.0, + "completions/min_terminated_length": 907.0, + "epoch": 0.6616654163540885, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8234684126132255, + "kl": 0.0135345458984375, + "learning_rate": 3.793814274580329e-07, + "loss": -0.0437, + "num_tokens": 119733241.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9564077854156494, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.010276730823644898, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.019985519888173958, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09651328828101766, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2646 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1389.0, + "completions/mean_length": 1151.75, + "completions/mean_terminated_length": 1102.0, + "completions/min_length": 701.0, + "completions/min_terminated_length": 701.0, + "epoch": 0.6619154788697175, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.582977005914331, + "kl": 0.020721435546875, + "learning_rate": 3.7901801363130526e-07, + "loss": 0.0313, + "num_tokens": 119782277.0, + "reward": 0.0, + "reward_std": 0.7967680096626282, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07062111258535837, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07610622454777213, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10470416879457554, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2647 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1372.0, + "completions/mean_length": 1408.75, + "completions/mean_terminated_length": 1256.666748046875, + "completions/min_length": 1118.0, + "completions/min_terminated_length": 1118.0, + "epoch": 0.6621655413853463, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.631426394849482, + "kl": 0.0120391845703125, + "learning_rate": 3.786547300871673e-07, + "loss": 0.0027, + "num_tokens": 119845433.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5636178255081177, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04294814697310382, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09756802749582899, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08595864638818418, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2648 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1170.125, + "completions/mean_terminated_length": 1094.0, + "completions/min_length": 864.0, + "completions/min_terminated_length": 864.0, + "epoch": 0.6624156039009752, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1567025540203755, + "kl": 0.0166473388671875, + "learning_rate": 3.78291577102429e-07, + "loss": 0.0304, + "num_tokens": 119891699.0, + "reward": 0.0, + "reward_std": 0.7401771545410156, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010690100082849723, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.042976039890093395, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10327955589886446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2649 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1321.0, + "completions/max_terminated_length": 1321.0, + "completions/mean_length": 1143.125, + "completions/mean_terminated_length": 1143.125, + "completions/min_length": 878.0, + "completions/min_terminated_length": 878.0, + "epoch": 0.6626656664166042, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.47771223490276, + "kl": 0.023162841796875, + "learning_rate": 3.779285549538006e-07, + "loss": -0.0593, + "num_tokens": 119941517.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.055275559425354, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.16237033841673257, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16671115729583558, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14089659985908765, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2650 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1012.1875, + "completions/mean_terminated_length": 979.666748046875, + "completions/min_length": 598.0, + "completions/min_terminated_length": 598.0, + "epoch": 0.662915728932233, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.684592762977475, + "kl": 0.013519287109375, + "learning_rate": 3.7756566391789345e-07, + "loss": -0.0263, + "num_tokens": 119971688.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.29914185404777527, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.15877014229402717, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1523520849216618, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1600347184554374, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2651 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1369.625, + "completions/mean_terminated_length": 1310.3636474609375, + "completions/min_length": 1087.0, + "completions/min_terminated_length": 1087.0, + "epoch": 0.6631657914478619, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.799928831312321, + "kl": 0.0132904052734375, + "learning_rate": 3.7720290427121817e-07, + "loss": 0.0274, + "num_tokens": 120018898.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8085658550262451, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00868085148285207, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.044205199132272666, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1275843947266976, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2652 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1123.375, + "completions/mean_terminated_length": 1098.2667236328125, + "completions/min_length": 861.0, + "completions/min_terminated_length": 861.0, + "epoch": 0.6634158539634909, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.559102500365913, + "kl": 0.019561767578125, + "learning_rate": 3.7684027629018547e-07, + "loss": 0.0073, + "num_tokens": 120059088.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.023788571357727, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12836807645795842, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0991814925484317, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06763190130459204, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2653 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1315.1875, + "completions/mean_terminated_length": 1253.5833740234375, + "completions/min_length": 1115.0, + "completions/min_terminated_length": 1115.0, + "epoch": 0.6636659164791198, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.166462676977729, + "kl": 0.0169525146484375, + "learning_rate": 3.76477780251106e-07, + "loss": -0.0242, + "num_tokens": 120119355.0, + "reward": 0.0, + "reward_std": 0.98175448179245, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06656559207925981, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14857408870118705, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0666666666666667, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2654 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1263.5, + "completions/mean_terminated_length": 1247.7333984375, + "completions/min_length": 869.0, + "completions/min_terminated_length": 869.0, + "epoch": 0.6639159789947486, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.695669648533853, + "kl": 0.017730712890625, + "learning_rate": 3.761154164301896e-07, + "loss": -0.0321, + "num_tokens": 120163267.0, + "reward": 0.0, + "reward_std": 0.5651896595954895, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02163426966518641, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.062307470888411376, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13601470508735444, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2655 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1380.625, + "completions/mean_terminated_length": 1287.77783203125, + "completions/min_length": 1053.0, + "completions/min_terminated_length": 1053.0, + "epoch": 0.6641660415103776, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7444490521234273, + "kl": 0.0181884765625, + "learning_rate": 3.7575318510354546e-07, + "loss": 0.0166, + "num_tokens": 120207589.0, + "reward": 0.0, + "reward_std": 0.7857463955879211, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010054681949769323, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12271578886839746, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0969917904124231, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2656 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1129.4375, + "completions/mean_terminated_length": 1104.7333984375, + "completions/min_length": 816.0, + "completions/min_terminated_length": 816.0, + "epoch": 0.6644161040260065, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.976631775952348, + "kl": 0.016143798828125, + "learning_rate": 3.7539108654718175e-07, + "loss": -0.0071, + "num_tokens": 120255460.0, + "reward": 0.0, + "reward_std": 1.0286458730697632, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10319240567680191, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14122142555133305, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14298407059684812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2657 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1447.75, + "completions/mean_terminated_length": 1332.800048828125, + "completions/min_length": 1197.0, + "completions/min_terminated_length": 1197.0, + "epoch": 0.6646661665416355, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.070343759895366, + "kl": 0.025848388671875, + "learning_rate": 3.750291210370057e-07, + "loss": -0.0025, + "num_tokens": 120301760.0, + "reward": 0.0, + "reward_std": 0.5378192663192749, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.21550612454149753, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2831484353474739, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10183501544346313, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2658 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1285.1875, + "completions/mean_terminated_length": 1118.111083984375, + "completions/min_length": 1005.0, + "completions/min_terminated_length": 1005.0, + "epoch": 0.6649162290572643, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3218468695936076, + "kl": 0.012237548828125, + "learning_rate": 3.7466728884882306e-07, + "loss": 0.0065, + "num_tokens": 120352915.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.4099141061306, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07464646722962222, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12263209853585398, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14401645996461915, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2659 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1373.125, + "completions/mean_terminated_length": 1274.4444580078125, + "completions/min_length": 887.0, + "completions/min_terminated_length": 887.0, + "epoch": 0.6651662915728932, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2661086717205148, + "kl": 0.0091552734375, + "learning_rate": 3.743055902583373e-07, + "loss": 0.0277, + "num_tokens": 120402389.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0662798881530762, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.18452757527219388, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09724456643352652, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10036968702787748, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2660 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1233.0, + "completions/mean_length": 1198.5625, + "completions/mean_terminated_length": 1061.5455322265625, + "completions/min_length": 919.0, + "completions/min_terminated_length": 919.0, + "epoch": 0.6654163540885222, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.072954571059516, + "kl": 0.028228759765625, + "learning_rate": 3.7394402554115147e-07, + "loss": 0.0174, + "num_tokens": 120453254.0, + "reward": 0.0, + "reward_std": 0.7107806205749512, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.047806501982659545, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08363058713213285, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13957607775504183, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2661 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1367.0, + "completions/max_terminated_length": 1367.0, + "completions/mean_length": 1154.4375, + "completions/mean_terminated_length": 1154.4375, + "completions/min_length": 918.0, + "completions/min_terminated_length": 918.0, + "epoch": 0.665666416604151, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.169261648516551, + "kl": 0.020477294921875, + "learning_rate": 3.735825949727657e-07, + "loss": -0.0081, + "num_tokens": 120494685.0, + "reward": 0.0, + "reward_std": 0.8893247842788696, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.022997196232560413, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05962481302064434, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13957607775504183, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2662 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1258.0, + "completions/mean_length": 1280.0625, + "completions/mean_terminated_length": 1060.125, + "completions/min_length": 929.0, + "completions/min_terminated_length": 929.0, + "epoch": 0.6659164791197799, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7763853201910464, + "kl": 0.0175018310546875, + "learning_rate": 3.732212988285779e-07, + "loss": -0.0081, + "num_tokens": 120547622.0, + "reward": 0.0, + "reward_std": 1.042985439300537, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12143028145584556, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18146031263782608, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14395215254459456, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2663 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1220.0, + "completions/mean_length": 1211.1875, + "completions/mean_terminated_length": 986.5555419921875, + "completions/min_length": 776.0, + "completions/min_terminated_length": 776.0, + "epoch": 0.6661665416354089, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4926768353053235, + "kl": 0.0172576904296875, + "learning_rate": 3.7286013738388347e-07, + "loss": -0.037, + "num_tokens": 120604769.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0429022312164307, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11776950222535149, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19149882810749339, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14194417264596723, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2664 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1184.0, + "completions/mean_terminated_length": 1078.666748046875, + "completions/min_length": 781.0, + "completions/min_terminated_length": 781.0, + "epoch": 0.6664166041510378, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.332300598068354, + "kl": 0.0116119384765625, + "learning_rate": 3.72499110913876e-07, + "loss": 0.0277, + "num_tokens": 120643817.0, + "reward": 0.0, + "reward_std": 0.6387683153152466, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0744974253426234, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1799179261473328, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1343709624716425, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2665 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1401.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1134.875, + "completions/mean_terminated_length": 1134.875, + "completions/min_length": 965.0, + "completions/min_terminated_length": 965.0, + "epoch": 0.6666666666666666, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.339198083582932, + "kl": 0.01904296875, + "learning_rate": 3.7213821969364533e-07, + "loss": -0.0419, + "num_tokens": 120684151.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8888421058654785, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12142686418322848, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16564266720908544, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09651328828101763, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2666 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1400.0, + "completions/max_terminated_length": 1400.0, + "completions/mean_length": 1168.5, + "completions/mean_terminated_length": 1168.5, + "completions/min_length": 895.0, + "completions/min_terminated_length": 895.0, + "epoch": 0.6669167291822956, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5150544054931636, + "kl": 0.02154541015625, + "learning_rate": 3.7177746399817853e-07, + "loss": -0.0114, + "num_tokens": 120722135.0, + "reward": 0.0, + "reward_std": 0.8416422605514526, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.006212185602703271, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03274311787172654, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237132, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2667 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1190.125, + "completions/mean_terminated_length": 1169.4666748046875, + "completions/min_length": 885.0, + "completions/min_terminated_length": 885.0, + "epoch": 0.6671667916979245, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.605810219239733, + "kl": 0.022796630859375, + "learning_rate": 3.7141684410235976e-07, + "loss": -0.0178, + "num_tokens": 120768321.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9229968190193176, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03614828750624949, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07826982516239309, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2668 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1401.25, + "completions/mean_terminated_length": 1342.0, + "completions/min_length": 1169.0, + "completions/min_terminated_length": 1169.0, + "epoch": 0.6674168542135533, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.621835410822344, + "kl": 0.01177978515625, + "learning_rate": 3.71056360280969e-07, + "loss": 0.0144, + "num_tokens": 120821101.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8901538848876953, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.008902771645082394, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06194194455413276, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869929, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2669 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1230.0, + "completions/max_terminated_length": 1230.0, + "completions/mean_length": 1037.375, + "completions/mean_terminated_length": 1037.375, + "completions/min_length": 839.0, + "completions/min_terminated_length": 839.0, + "epoch": 0.6676669167291823, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6840544859815396, + "kl": 0.01275634765625, + "learning_rate": 3.7069601280868326e-07, + "loss": 0.02, + "num_tokens": 120857563.0, + "reward": -3.725290298461914e-08, + "reward_std": 0.9587996006011963, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.001530915865792256, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09752072752896702, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2670 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1271.625, + "completions/mean_terminated_length": 1167.8182373046875, + "completions/min_length": 965.0, + "completions/min_terminated_length": 965.0, + "epoch": 0.6679169792448112, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2230275403540847, + "kl": 0.021514892578125, + "learning_rate": 3.70335801960075e-07, + "loss": -0.0936, + "num_tokens": 120911037.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9748387336730957, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.007089913650458348, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09812037719520579, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09727776191382576, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2671 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1474.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1088.625, + "completions/mean_terminated_length": 1088.625, + "completions/min_length": 771.0, + "completions/min_terminated_length": 771.0, + "epoch": 0.6681670417604401, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0127133946284075, + "kl": 0.017791748046875, + "learning_rate": 3.699757280096132e-07, + "loss": 0.0371, + "num_tokens": 120960423.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7515195608139038, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.012385075993007474, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06715004015021019, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14291929864761416, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2672 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1203.0, + "completions/max_terminated_length": 1203.0, + "completions/mean_length": 1058.1875, + "completions/mean_terminated_length": 1058.1875, + "completions/min_length": 842.0, + "completions/min_terminated_length": 842.0, + "epoch": 0.668417104276069, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5873909176841106, + "kl": 0.011077880859375, + "learning_rate": 3.696157912316621e-07, + "loss": -0.0549, + "num_tokens": 121000266.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0467177629470825, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.040295060326014605, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07326210581656979, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2673 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1428.0, + "completions/mean_length": 1081.75, + "completions/mean_terminated_length": 942.3333740234375, + "completions/min_length": 694.0, + "completions/min_terminated_length": 694.0, + "epoch": 0.6686671667916979, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6282581862234444, + "kl": 0.01556396484375, + "learning_rate": 3.692559919004816e-07, + "loss": 0.0025, + "num_tokens": 121046302.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.793614387512207, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04564959182138966, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07008039332813695, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11409872268574493, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2674 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1240.3125, + "completions/mean_terminated_length": 1223.0001220703125, + "completions/min_length": 821.0, + "completions/min_terminated_length": 821.0, + "epoch": 0.6689172293073268, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.936162189221416, + "kl": 0.0167083740234375, + "learning_rate": 3.6889633029022684e-07, + "loss": 0.002, + "num_tokens": 121086115.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5872515439987183, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06839247380693236, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10546457068935178, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09428090415820636, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2675 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1291.75, + "completions/mean_terminated_length": 1222.3333740234375, + "completions/min_length": 724.0, + "completions/min_terminated_length": 724.0, + "epoch": 0.6691672918229558, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.241592889984401, + "kl": 0.015777587890625, + "learning_rate": 3.6853680667494814e-07, + "loss": -0.0265, + "num_tokens": 121137039.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5154055953025818, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.24354160302299074, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.185790767863407, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0917928424547684, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2676 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1382.0, + "completions/mean_length": 1291.75, + "completions/mean_terminated_length": 1222.3333740234375, + "completions/min_length": 933.0, + "completions/min_terminated_length": 933.0, + "epoch": 0.6694173543385846, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0507144424124495, + "kl": 0.01519775390625, + "learning_rate": 3.681774213285904e-07, + "loss": 0.0173, + "num_tokens": 121183451.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0396714210510254, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09927994113214832, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10135286247132307, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07187952884282608, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2677 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1402.0, + "completions/mean_length": 1147.25, + "completions/mean_terminated_length": 1065.84619140625, + "completions/min_length": 741.0, + "completions/min_terminated_length": 741.0, + "epoch": 0.6696674168542136, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.290576504560391, + "kl": 0.01776123046875, + "learning_rate": 3.678181745249931e-07, + "loss": -0.0478, + "num_tokens": 121226871.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6960647106170654, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07419759885564044, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08781834469767925, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1270024788326182, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2678 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1266.0, + "completions/max_terminated_length": 1266.0, + "completions/mean_length": 1097.8125, + "completions/mean_terminated_length": 1097.8125, + "completions/min_length": 921.0, + "completions/min_terminated_length": 921.0, + "epoch": 0.6699174793698425, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6356614897605812, + "kl": 0.010772705078125, + "learning_rate": 3.6745906653789073e-07, + "loss": -0.0153, + "num_tokens": 121274684.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0653060674667358, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.038094514689296435, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15689824440293076, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941137, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2679 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1397.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 1088.75, + "completions/mean_terminated_length": 1088.75, + "completions/min_length": 612.0, + "completions/min_terminated_length": 612.0, + "epoch": 0.6701675418854713, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4200218462271663, + "kl": 0.014923095703125, + "learning_rate": 3.6710009764091143e-07, + "loss": -0.0078, + "num_tokens": 121317776.0, + "reward": 0.0, + "reward_std": 1.007656216621399, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04528626781844647, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05078100654943483, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1355373393953503, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2680 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1190.5, + "completions/mean_terminated_length": 1049.8182373046875, + "completions/min_length": 792.0, + "completions/min_terminated_length": 792.0, + "epoch": 0.6704176044011003, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5676072140445925, + "kl": 0.0167388916015625, + "learning_rate": 3.6674126810757753e-07, + "loss": 0.0263, + "num_tokens": 121374064.0, + "reward": 4.470348358154297e-08, + "reward_std": 0.9374393820762634, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.027579728516128298, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08112123178582782, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15563490039905004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2681 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1486.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1166.6875, + "completions/mean_terminated_length": 1166.6875, + "completions/min_length": 875.0, + "completions/min_terminated_length": 875.0, + "epoch": 0.6706676669167292, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2484913555163266, + "kl": 0.01776123046875, + "learning_rate": 3.6638257821130546e-07, + "loss": -0.0104, + "num_tokens": 121417651.0, + "reward": 0.0, + "reward_std": 1.0238916873931885, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.014830653642187603, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13867207115963742, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12881223774390613, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2682 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1309.375, + "completions/mean_terminated_length": 1245.8333740234375, + "completions/min_length": 1067.0, + "completions/min_terminated_length": 1067.0, + "epoch": 0.6709177294323581, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.099335508295256, + "kl": 0.02008056640625, + "learning_rate": 3.660240282254048e-07, + "loss": 0.0069, + "num_tokens": 121470849.0, + "reward": 0.0, + "reward_std": 0.9863029718399048, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00687002215507373, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.027578316017392062, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1002773930432755, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2683 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 1191.6875, + "completions/mean_terminated_length": 1171.1334228515625, + "completions/min_length": 889.0, + "completions/min_terminated_length": 889.0, + "epoch": 0.671167791947987, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1842920946115907, + "kl": 0.01788330078125, + "learning_rate": 3.6566561842307877e-07, + "loss": -0.0261, + "num_tokens": 121517012.0, + "reward": 0.0, + "reward_std": 0.9435462355613708, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.018940551876279597, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06835037502525984, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10809803506625448, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2684 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1362.0, + "completions/mean_length": 1048.875, + "completions/mean_terminated_length": 984.4285888671875, + "completions/min_length": 710.0, + "completions/min_terminated_length": 710.0, + "epoch": 0.6714178544636159, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.100784905148401, + "kl": 0.020172119140625, + "learning_rate": 3.653073490774236e-07, + "loss": -0.0239, + "num_tokens": 121558210.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7194000482559204, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08690838709084296, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12621786711164454, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715322, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2685 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1386.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 1181.0, + "completions/mean_terminated_length": 1181.0, + "completions/min_length": 889.0, + "completions/min_terminated_length": 889.0, + "epoch": 0.6716679169792448, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7246794101918717, + "kl": 0.01450347900390625, + "learning_rate": 3.6494922046142874e-07, + "loss": -0.0128, + "num_tokens": 121591930.0, + "reward": 0.0, + "reward_std": 0.7546436786651611, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.020717024987400294, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.056028154082777865, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09737289911202952, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2686 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1412.0, + "completions/max_terminated_length": 1412.0, + "completions/mean_length": 1015.5, + "completions/mean_terminated_length": 1015.5, + "completions/min_length": 648.0, + "completions/min_terminated_length": 648.0, + "epoch": 0.6719179794948738, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5715265521547153, + "kl": 0.019683837890625, + "learning_rate": 3.645912328479763e-07, + "loss": 0.0091, + "num_tokens": 121639314.0, + "reward": 0.0, + "reward_std": 0.7320247888565063, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02124585264930854, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10912507148745487, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066221, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2687 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1264.0, + "completions/mean_length": 1242.5, + "completions/mean_terminated_length": 1042.2222900390625, + "completions/min_length": 839.0, + "completions/min_terminated_length": 839.0, + "epoch": 0.6721680420105026, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.127924530575951, + "kl": 0.014801025390625, + "learning_rate": 3.642333865098409e-07, + "loss": 0.0217, + "num_tokens": 121682914.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.324258029460907, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0022065510700068174, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08454672485183096, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14981470036162822, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2688 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 1362.9375, + "completions/mean_terminated_length": 1186.71435546875, + "completions/min_length": 494.0, + "completions/min_terminated_length": 494.0, + "epoch": 0.6724181045261315, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0417974548189552, + "kl": 0.01385498046875, + "learning_rate": 3.6387568171968965e-07, + "loss": 0.0066, + "num_tokens": 121741705.0, + "reward": -3.725290298461914e-08, + "reward_std": 1.0631362199783325, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.004859828771402426, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16107281063554035, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10318986456114841, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2689 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1103.75, + "completions/mean_terminated_length": 1012.3077392578125, + "completions/min_length": 777.0, + "completions/min_terminated_length": 777.0, + "epoch": 0.6726681670417605, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.990617194434671, + "kl": 0.0170440673828125, + "learning_rate": 3.6351811875008163e-07, + "loss": 0.0565, + "num_tokens": 121791173.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0323952436447144, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02194334248816528, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07056318323400543, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12995725793078622, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2690 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1406.0, + "completions/max_terminated_length": 1406.0, + "completions/mean_length": 1215.6875, + "completions/mean_terminated_length": 1215.6875, + "completions/min_length": 971.0, + "completions/min_terminated_length": 971.0, + "epoch": 0.6729182295573893, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5712012780695463, + "kl": 0.014801025390625, + "learning_rate": 3.631606978734679e-07, + "loss": 0.0027, + "num_tokens": 121835248.0, + "reward": 0.0, + "reward_std": 0.6537569761276245, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03694079020685474, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1140927399129103, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12405196043952264, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2691 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1347.5, + "completions/mean_terminated_length": 1256.0, + "completions/min_length": 984.0, + "completions/min_terminated_length": 984.0, + "epoch": 0.6731682920730182, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.924606724587911, + "kl": 0.0162506103515625, + "learning_rate": 3.628034193621913e-07, + "loss": 0.0093, + "num_tokens": 121897432.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6522361040115356, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01633924263241421, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.020292077405665913, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11792967144619461, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2692 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1395.9375, + "completions/mean_terminated_length": 1262.1429443359375, + "completions/min_length": 1114.0, + "completions/min_terminated_length": 1114.0, + "epoch": 0.6734183545886472, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.365635422090836, + "kl": 0.0167388916015625, + "learning_rate": 3.6244628348848627e-07, + "loss": 0.0115, + "num_tokens": 121953775.0, + "reward": 0.0, + "reward_std": 0.838664174079895, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10636573978096268, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19366044475111238, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8250000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078612, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2693 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1275.0, + "completions/max_terminated_length": 1275.0, + "completions/mean_length": 1044.9375, + "completions/mean_terminated_length": 1044.9375, + "completions/min_length": 790.0, + "completions/min_terminated_length": 790.0, + "epoch": 0.673668417104276, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7358061098090194, + "kl": 0.0157623291015625, + "learning_rate": 3.6208929052447846e-07, + "loss": -0.0383, + "num_tokens": 121988942.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8557125329971313, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04647393552189227, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11125046740887067, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09496588081262934, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2694 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1340.0, + "completions/mean_length": 1200.375, + "completions/mean_terminated_length": 1180.4000244140625, + "completions/min_length": 995.0, + "completions/min_terminated_length": 995.0, + "epoch": 0.673918479619905, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9437437009758483, + "kl": 0.0084075927734375, + "learning_rate": 3.6173244074218446e-07, + "loss": -0.0193, + "num_tokens": 122034292.0, + "reward": 1.862645149230957e-08, + "reward_std": 1.0350899696350098, + "rewards/wordcountpos_reward_GEOBench/mean": 1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06319836249304131, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17123724555638148, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06763190130459203, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2695 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1234.0, + "completions/mean_length": 1080.1875, + "completions/mean_terminated_length": 889.3636474609375, + "completions/min_length": 519.0, + "completions/min_terminated_length": 519.0, + "epoch": 0.6741685421355339, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5326644886094747, + "kl": 0.016082763671875, + "learning_rate": 3.6137573441351223e-07, + "loss": -0.0252, + "num_tokens": 122087871.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.865774393081665, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02184260878572907, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17664641854337737, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.675, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715323, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2696 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 988.0, + "completions/mean_length": 868.1875, + "completions/mean_terminated_length": 826.0667114257812, + "completions/min_length": 627.0, + "completions/min_terminated_length": 627.0, + "epoch": 0.6744186046511628, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6717438052124676, + "kl": 0.02001953125, + "learning_rate": 3.6101917181025986e-07, + "loss": -0.035, + "num_tokens": 122116874.0, + "reward": 0.0, + "reward_std": 0.7294286489486694, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04532875726353715, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0982160756290172, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11674600476945511, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 2697 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1308.0, + "completions/max_terminated_length": 1308.0, + "completions/mean_length": 1039.75, + "completions/mean_terminated_length": 1039.75, + "completions/min_length": 815.0, + "completions/min_terminated_length": 815.0, + "epoch": 0.6746686671667917, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1296825779433237, + "kl": 0.01837158203125, + "learning_rate": 3.6066275320411634e-07, + "loss": 0.0204, + "num_tokens": 122157614.0, + "reward": 0.0, + "reward_std": 0.38172656297683716, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.015253383579030399, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13806081960900365, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2698 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1199.125, + "completions/mean_terminated_length": 1156.1429443359375, + "completions/min_length": 783.0, + "completions/min_terminated_length": 783.0, + "epoch": 0.6749187296824206, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.438723607121624, + "kl": 0.022308349609375, + "learning_rate": 3.6030647886666056e-07, + "loss": -0.0137, + "num_tokens": 122190136.0, + "reward": 2.60770320892334e-08, + "reward_std": 0.9834088087081909, + "rewards/wordcountpos_reward_GEOBench/mean": 2.60770320892334e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.016799890438982633, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.019736099204982573, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11925695879998881, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2699 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1419.0, + "completions/max_terminated_length": 1419.0, + "completions/mean_length": 910.875, + "completions/mean_terminated_length": 910.875, + "completions/min_length": 516.0, + "completions/min_terminated_length": 516.0, + "epoch": 0.6751687921980495, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.203101363962012, + "kl": 0.0184783935546875, + "learning_rate": 3.599503490693622e-07, + "loss": -0.0149, + "num_tokens": 122215862.0, + "reward": -1.862645149230957e-08, + "reward_std": 1.0654774904251099, + "rewards/wordcountpos_reward_GEOBench/mean": -1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08709881740194717, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.050670480208599634, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07302967433402217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2700 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1317.0, + "completions/mean_length": 1234.9375, + "completions/mean_terminated_length": 1114.45458984375, + "completions/min_length": 658.0, + "completions/min_terminated_length": 658.0, + "epoch": 0.6754188547136785, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9530645631992947, + "kl": 0.0157623291015625, + "learning_rate": 3.595943640835799e-07, + "loss": -0.0067, + "num_tokens": 122264621.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.043255090713501, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.13908599617107217, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10967565154478891, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.131021626713557, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2701 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1256.0, + "completions/max_terminated_length": 1256.0, + "completions/mean_length": 998.5, + "completions/mean_terminated_length": 998.5, + "completions/min_length": 843.0, + "completions/min_terminated_length": 843.0, + "epoch": 0.6756689172293073, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9003545521006524, + "kl": 0.013214111328125, + "learning_rate": 3.592385241805628e-07, + "loss": 0.0468, + "num_tokens": 122304749.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0508443117141724, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01159398665964617, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19040720920600315, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2702 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1447.75, + "completions/mean_terminated_length": 1380.571533203125, + "completions/min_length": 1163.0, + "completions/min_terminated_length": 1163.0, + "epoch": 0.6759189797449362, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3965253694929776, + "kl": 0.014801025390625, + "learning_rate": 3.5888282963144854e-07, + "loss": -0.01, + "num_tokens": 122352929.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9417842030525208, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.018096393035247255, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09171140046749317, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715322, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2703 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1211.0, + "completions/mean_length": 1271.75, + "completions/mean_terminated_length": 1094.2222900390625, + "completions/min_length": 984.0, + "completions/min_terminated_length": 984.0, + "epoch": 0.6761690422605652, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.910697205880899, + "kl": 0.0097503662109375, + "learning_rate": 3.58527280707265e-07, + "loss": -0.0306, + "num_tokens": 122402437.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.015303373336792, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.015405780667690924, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06924989014893378, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503963, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2704 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1195.3125, + "completions/mean_terminated_length": 1151.7857666015625, + "completions/min_length": 974.0, + "completions/min_terminated_length": 974.0, + "epoch": 0.676419104776194, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.853422993642348, + "kl": 0.011688232421875, + "learning_rate": 3.581718776789284e-07, + "loss": 0.0313, + "num_tokens": 122444418.0, + "reward": -1.862645149230957e-08, + "reward_std": 1.0484932661056519, + "rewards/wordcountpos_reward_GEOBench/mean": -1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10402997155998167, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08177770206765206, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08944271909999162, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2705 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1492.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1159.125, + "completions/mean_terminated_length": 1159.125, + "completions/min_length": 760.0, + "completions/min_terminated_length": 760.0, + "epoch": 0.6766691672918229, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2675019397968663, + "kl": 0.0139617919921875, + "learning_rate": 3.5781662081724405e-07, + "loss": -0.0334, + "num_tokens": 122479060.0, + "reward": 0.0, + "reward_std": 0.8038930892944336, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.035981066713161765, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08594807159285485, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1338185615204685, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2706 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1304.0, + "completions/mean_length": 1164.875, + "completions/mean_terminated_length": 1012.5454711914062, + "completions/min_length": 753.0, + "completions/min_terminated_length": 753.0, + "epoch": 0.6769192298074519, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.16854278623862, + "kl": 0.02105712890625, + "learning_rate": 3.574615103929061e-07, + "loss": -0.0245, + "num_tokens": 122528066.0, + "reward": 0.0, + "reward_std": 0.5396085977554321, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.16076480831368975, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3277753179345752, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6458333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12102953419784837, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2707 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1343.0, + "completions/max_terminated_length": 1343.0, + "completions/mean_length": 1106.25, + "completions/mean_terminated_length": 1106.25, + "completions/min_length": 931.0, + "completions/min_terminated_length": 931.0, + "epoch": 0.6771692923230808, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3333494313763574, + "kl": 0.016510009765625, + "learning_rate": 3.5710654667649675e-07, + "loss": -0.0044, + "num_tokens": 122563198.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8145296573638916, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.17759978233755147, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3610386575356704, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09888264649460886, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2708 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1368.0, + "completions/mean_length": 1271.125, + "completions/mean_terminated_length": 1194.8333740234375, + "completions/min_length": 1085.0, + "completions/min_terminated_length": 1085.0, + "epoch": 0.6774193548387096, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0236528872890185, + "kl": 0.0159912109375, + "learning_rate": 3.5675172993848613e-07, + "loss": 0.0069, + "num_tokens": 122614664.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6862202286720276, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02065054781318132, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06137186922428432, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1276569477008451, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2709 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1421.0, + "completions/mean_length": 1129.0625, + "completions/mean_terminated_length": 1104.3333740234375, + "completions/min_length": 847.0, + "completions/min_terminated_length": 847.0, + "epoch": 0.6776694173543386, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.322137707744963, + "kl": 0.015716552734375, + "learning_rate": 3.5639706044923347e-07, + "loss": -0.0501, + "num_tokens": 122657473.0, + "reward": 0.0, + "reward_std": 0.8364112973213196, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.017537885981356964, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09359590421153971, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503963, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2710 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1275.3125, + "completions/mean_terminated_length": 1200.416748046875, + "completions/min_length": 990.0, + "completions/min_terminated_length": 990.0, + "epoch": 0.6779194798699675, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5240428717843653, + "kl": 0.0112152099609375, + "learning_rate": 3.560425384789848e-07, + "loss": -0.0366, + "num_tokens": 122703886.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9708830714225769, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07930473005484279, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11774857375910139, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06426219440409446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2711 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1173.0, + "completions/max_terminated_length": 1173.0, + "completions/mean_length": 997.3125, + "completions/mean_terminated_length": 997.3125, + "completions/min_length": 688.0, + "completions/min_terminated_length": 688.0, + "epoch": 0.6781695423855963, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4166238984914847, + "kl": 0.02288818359375, + "learning_rate": 3.5568816429787405e-07, + "loss": -0.0171, + "num_tokens": 122740091.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0227668285369873, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.042145130728699326, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11793808930915026, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1258305739211792, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2712 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1332.9375, + "completions/mean_terminated_length": 1232.7000732421875, + "completions/min_length": 681.0, + "completions/min_terminated_length": 681.0, + "epoch": 0.6784196049012253, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.24640606942673, + "kl": 0.020294189453125, + "learning_rate": 3.5533393817592275e-07, + "loss": 0.0058, + "num_tokens": 122789370.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9053841829299927, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04559272176508689, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.057769493307368074, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11917929226045816, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2713 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1305.875, + "completions/mean_terminated_length": 1261.0770263671875, + "completions/min_length": 1021.0, + "completions/min_terminated_length": 1021.0, + "epoch": 0.6786696674168542, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.955717486446063, + "kl": 0.0152435302734375, + "learning_rate": 3.549798603830393e-07, + "loss": -0.0323, + "num_tokens": 122836096.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9031736254692078, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11044402637962501, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0952559106581359, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14548768561863465, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2714 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1329.25, + "completions/mean_terminated_length": 1289.84619140625, + "completions/min_length": 999.0, + "completions/min_terminated_length": 999.0, + "epoch": 0.6789197299324832, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7920347992964674, + "kl": 0.01593017578125, + "learning_rate": 3.5462593118901916e-07, + "loss": 0.0074, + "num_tokens": 122874996.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.863773763179779, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08721905542274619, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06193940677063021, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087684, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2715 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1335.0, + "completions/mean_length": 1151.125, + "completions/mean_terminated_length": 1127.86669921875, + "completions/min_length": 905.0, + "completions/min_terminated_length": 905.0, + "epoch": 0.679169792448112, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0855270672286568, + "kl": 0.019744873046875, + "learning_rate": 3.5427215086354456e-07, + "loss": 0.013, + "num_tokens": 122926694.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9154713153839111, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06475034388221718, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07010045951784309, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10174405069512348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2716 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1211.0, + "completions/max_terminated_length": 1211.0, + "completions/mean_length": 1001.75, + "completions/mean_terminated_length": 1001.75, + "completions/min_length": 707.0, + "completions/min_terminated_length": 707.0, + "epoch": 0.6794198549637409, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.366805567735963, + "kl": 0.019378662109375, + "learning_rate": 3.5391851967618456e-07, + "loss": 0.0211, + "num_tokens": 122966426.0, + "reward": 0.0, + "reward_std": 0.6723920702934265, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.008086537030204692, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06599852339079425, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316811, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2717 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1392.0, + "completions/mean_length": 1157.1875, + "completions/mean_terminated_length": 1108.21435546875, + "completions/min_length": 768.0, + "completions/min_terminated_length": 768.0, + "epoch": 0.6796699174793699, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.841836686408547, + "kl": 0.0157012939453125, + "learning_rate": 3.535650378963944e-07, + "loss": -0.0417, + "num_tokens": 123007693.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5313202142715454, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.022613551276931307, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0720940131221362, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11409872268574492, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2718 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1358.0, + "completions/mean_length": 1391.5625, + "completions/mean_terminated_length": 1153.0, + "completions/min_length": 801.0, + "completions/min_terminated_length": 801.0, + "epoch": 0.6799199799949988, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2756817993486447, + "kl": 0.013629913330078125, + "learning_rate": 3.532117057935151e-07, + "loss": -0.0245, + "num_tokens": 123046398.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7076613903045654, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.012050275778980567, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.48414632098924554, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.675, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353543, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2719 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1358.4375, + "completions/mean_terminated_length": 1294.0909423828125, + "completions/min_length": 1048.0, + "completions/min_terminated_length": 1048.0, + "epoch": 0.6801700425106276, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4605039029191564, + "kl": 0.01055908203125, + "learning_rate": 3.5285852363677426e-07, + "loss": 0.0088, + "num_tokens": 123091077.0, + "reward": 0.0, + "reward_std": 0.4964600205421448, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06533427429802477, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13143976110374925, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066471, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2720 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1359.0, + "completions/max_terminated_length": 1359.0, + "completions/mean_length": 907.0625, + "completions/mean_terminated_length": 907.0625, + "completions/min_length": 667.0, + "completions/min_terminated_length": 667.0, + "epoch": 0.6804201050262566, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0064419244472926, + "kl": 0.0126800537109375, + "learning_rate": 3.5250549169528485e-07, + "loss": -0.0447, + "num_tokens": 123130038.0, + "reward": 0.0, + "reward_std": 0.904825747013092, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05598443816270076, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09230130506842359, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11213417888437974, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2721 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1398.0, + "completions/mean_length": 1200.1875, + "completions/mean_terminated_length": 1131.0, + "completions/min_length": 850.0, + "completions/min_terminated_length": 850.0, + "epoch": 0.6806701675418855, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.237191776853671, + "kl": 0.010162353515625, + "learning_rate": 3.5215261023804567e-07, + "loss": -0.0426, + "num_tokens": 123176641.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9213888645172119, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0780664271401254, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10327066865301386, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08432740427115677, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2722 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1379.0, + "completions/mean_length": 1089.6875, + "completions/mean_terminated_length": 843.5, + "completions/min_length": 645.0, + "completions/min_terminated_length": 645.0, + "epoch": 0.6809202300575143, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.007720180381011, + "kl": 0.0140838623046875, + "learning_rate": 3.517998795339401e-07, + "loss": -0.0688, + "num_tokens": 123221500.0, + "reward": 0.0, + "reward_std": 0.8217609524726868, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.030747549385433226, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07381991719389888, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13109227736669002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2723 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 1339.5625, + "completions/mean_terminated_length": 1286.0833740234375, + "completions/min_length": 1174.0, + "completions/min_terminated_length": 1174.0, + "epoch": 0.6811702925731433, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3446832898345424, + "kl": 0.0161895751953125, + "learning_rate": 3.5144729985173793e-07, + "loss": -0.0215, + "num_tokens": 123267157.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9894387125968933, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01141153043132604, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03371389616995933, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08432740427115676, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2724 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1127.0, + "completions/max_terminated_length": 1127.0, + "completions/mean_length": 941.875, + "completions/mean_terminated_length": 941.875, + "completions/min_length": 811.0, + "completions/min_terminated_length": 811.0, + "epoch": 0.6814203550887722, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.342612208098056, + "kl": 0.0122528076171875, + "learning_rate": 3.510948714600929e-07, + "loss": 0.0176, + "num_tokens": 123297027.0, + "reward": 0.0, + "reward_std": 0.926203727722168, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.046057595727669244, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09142315069868452, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.060705726131767695, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2725 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 1234.1875, + "completions/mean_terminated_length": 1216.4666748046875, + "completions/min_length": 941.0, + "completions/min_terminated_length": 941.0, + "epoch": 0.6816704176044011, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.111697237745817, + "kl": 0.01593017578125, + "learning_rate": 3.5074259462754354e-07, + "loss": 0.0222, + "num_tokens": 123338310.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0049562454223633, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1608728548674085, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15460909737418826, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2726 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1486.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1012.625, + "completions/mean_terminated_length": 1012.625, + "completions/min_length": 726.0, + "completions/min_terminated_length": 726.0, + "epoch": 0.68192048012003, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.307677745711157, + "kl": 0.02099609375, + "learning_rate": 3.5039046962251295e-07, + "loss": 0.027, + "num_tokens": 123371448.0, + "reward": 0.0, + "reward_std": 0.7208243608474731, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1453623427832438, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07341629956453595, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12524050936172845, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2727 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1371.0, + "completions/max_terminated_length": 1371.0, + "completions/mean_length": 955.625, + "completions/mean_terminated_length": 955.625, + "completions/min_length": 680.0, + "completions/min_terminated_length": 680.0, + "epoch": 0.6821705426356589, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0964847617855966, + "kl": 0.022430419921875, + "learning_rate": 3.500384967133091e-07, + "loss": -0.0336, + "num_tokens": 123401658.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0136680603027344, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04548032386830055, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.039694868534180014, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08062257748298553, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2728 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1191.6875, + "completions/mean_terminated_length": 1120.5384521484375, + "completions/min_length": 865.0, + "completions/min_terminated_length": 865.0, + "epoch": 0.6824206051512878, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8424414051906166, + "kl": 0.01336669921875, + "learning_rate": 3.496866761681235e-07, + "loss": 0.018, + "num_tokens": 123451077.0, + "reward": 0.0, + "reward_std": 0.538988471031189, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11575917601264298, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15145568559488873, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10532137766186216, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2729 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1406.0, + "completions/mean_length": 1244.9375, + "completions/mean_terminated_length": 1208.5, + "completions/min_length": 1043.0, + "completions/min_terminated_length": 1043.0, + "epoch": 0.6826706676669168, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.333393011803426, + "kl": 0.024200439453125, + "learning_rate": 3.493350082550315e-07, + "loss": 0.0268, + "num_tokens": 123497756.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8635338544845581, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0007707365571603293, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09199381616350005, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715322, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2730 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1058.5625, + "completions/mean_terminated_length": 995.5000610351562, + "completions/min_length": 672.0, + "completions/min_terminated_length": 672.0, + "epoch": 0.6829207301825456, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.092847188751359, + "kl": 0.01287841796875, + "learning_rate": 3.489834932419929e-07, + "loss": -0.1014, + "num_tokens": 123528501.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5892418026924133, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00777020549224072, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.031131788870703527, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1270024788326182, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2731 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1458.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1101.25, + "completions/mean_terminated_length": 1101.25, + "completions/min_length": 818.0, + "completions/min_terminated_length": 818.0, + "epoch": 0.6831707926981745, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.105510940045592, + "kl": 0.0180816650390625, + "learning_rate": 3.4863213139684994e-07, + "loss": -0.034, + "num_tokens": 123569913.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0485221147537231, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0403951317085979, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07381220140460835, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1002773930432755, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2732 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1303.75, + "completions/mean_terminated_length": 1214.5455322265625, + "completions/min_length": 820.0, + "completions/min_terminated_length": 820.0, + "epoch": 0.6834208552138035, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5261274405386582, + "kl": 0.018157958984375, + "learning_rate": 3.482809229873288e-07, + "loss": -0.0453, + "num_tokens": 123624869.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9892751574516296, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.016330551389311897, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0785003003328752, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746357, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2733 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1439.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1066.5625, + "completions/mean_terminated_length": 1066.5625, + "completions/min_length": 795.0, + "completions/min_terminated_length": 795.0, + "epoch": 0.6836709177294323, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.2928249003045713, + "kl": 0.006855010986328125, + "learning_rate": 3.4792986828103845e-07, + "loss": -0.032, + "num_tokens": 123672494.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.0280876159667969, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06245471962553749, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09465786132590671, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1529342632927262, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2734 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1192.375, + "completions/mean_terminated_length": 1148.4285888671875, + "completions/min_length": 721.0, + "completions/min_terminated_length": 721.0, + "epoch": 0.6839209802450613, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.871782017440985, + "kl": 0.014984130859375, + "learning_rate": 3.475789675454713e-07, + "loss": -0.0579, + "num_tokens": 123716596.0, + "reward": -2.2351741790771484e-08, + "reward_std": 0.9970670938491821, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.019851532403880438, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10268305018769686, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1060223596263578, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2735 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1235.0, + "completions/mean_length": 1284.5625, + "completions/mean_terminated_length": 1069.125, + "completions/min_length": 928.0, + "completions/min_terminated_length": 928.0, + "epoch": 0.6841710427606902, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2759868925174285, + "kl": 0.0171051025390625, + "learning_rate": 3.4722822104800175e-07, + "loss": -0.0114, + "num_tokens": 123764653.0, + "reward": 0.0, + "reward_std": 0.5190559029579163, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10281207029025556, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.22728971324872518, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10390522747338703, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2736 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1414.0, + "completions/mean_length": 1168.5, + "completions/mean_terminated_length": 1146.4000244140625, + "completions/min_length": 921.0, + "completions/min_terminated_length": 921.0, + "epoch": 0.684421105276319, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.849213312543279, + "kl": 0.01580810546875, + "learning_rate": 3.4687762905588715e-07, + "loss": -0.0182, + "num_tokens": 123798285.0, + "reward": 0.0, + "reward_std": 0.8634228706359863, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04580190178140172, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.060873240268373846, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036262, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2737 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1326.0, + "completions/mean_length": 1269.3125, + "completions/mean_terminated_length": 1130.9000244140625, + "completions/min_length": 947.0, + "completions/min_terminated_length": 947.0, + "epoch": 0.684671167791948, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.210803077292359, + "kl": 0.0158538818359375, + "learning_rate": 3.465271918362668e-07, + "loss": -0.0127, + "num_tokens": 123857698.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0596448183059692, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0957598831855994, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13592713639261003, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12881223774390613, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2738 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1351.1875, + "completions/mean_terminated_length": 1235.4444580078125, + "completions/min_length": 811.0, + "completions/min_terminated_length": 811.0, + "epoch": 0.6849212303075769, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0803711994735403, + "kl": 0.017333984375, + "learning_rate": 3.461769096561622e-07, + "loss": -0.0277, + "num_tokens": 123912869.0, + "reward": 0.0, + "reward_std": 1.028899908065796, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03550682381006019, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.046163638633231904, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2739 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1257.5625, + "completions/mean_terminated_length": 1201.615478515625, + "completions/min_length": 910.0, + "completions/min_terminated_length": 910.0, + "epoch": 0.6851712928232058, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6632701325794623, + "kl": 0.00998687744140625, + "learning_rate": 3.458267827824769e-07, + "loss": -0.0512, + "num_tokens": 123954694.0, + "reward": 0.0, + "reward_std": 0.7001150846481323, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0363283160675749, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1341540293344089, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05900408021045227, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2740 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1383.0, + "completions/mean_length": 1133.9375, + "completions/mean_terminated_length": 967.5454711914062, + "completions/min_length": 549.0, + "completions/min_terminated_length": 549.0, + "epoch": 0.6854213553388347, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1129210954317093, + "kl": 0.014556884765625, + "learning_rate": 3.4547681148199565e-07, + "loss": -0.0523, + "num_tokens": 123996429.0, + "reward": -3.725290298461914e-08, + "reward_std": 0.953366756439209, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.032353979936350356, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.057462631767498266, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000003, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2741 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1164.0, + "completions/mean_length": 1202.875, + "completions/mean_terminated_length": 971.7777709960938, + "completions/min_length": 726.0, + "completions/min_terminated_length": 726.0, + "epoch": 0.6856714178544636, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9662464560812314, + "kl": 0.01470947265625, + "learning_rate": 3.4512699602138523e-07, + "loss": 0.0005, + "num_tokens": 124027331.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.3469626009464264, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07107654414993347, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08286190714398028, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14194417264596723, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2742 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1145.0, + "completions/mean_terminated_length": 1094.2857666015625, + "completions/min_length": 165.0, + "completions/min_terminated_length": 165.0, + "epoch": 0.6859214803700925, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6911154976910514, + "kl": 0.02374267578125, + "learning_rate": 3.4477733666719347e-07, + "loss": 0.0503, + "num_tokens": 124066547.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9104742407798767, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0588807017440799, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11650913621874588, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2743 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1380.0, + "completions/mean_length": 1277.25, + "completions/mean_terminated_length": 1176.0, + "completions/min_length": 1065.0, + "completions/min_terminated_length": 1065.0, + "epoch": 0.6861715428857215, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.6342057781690702, + "kl": 0.007774770259857178, + "learning_rate": 3.444278336858487e-07, + "loss": 0.0375, + "num_tokens": 124117911.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0177119970321655, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.19135284182137074, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15718811382093084, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05962847939999442, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2744 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1247.8125, + "completions/mean_terminated_length": 1231.0001220703125, + "completions/min_length": 979.0, + "completions/min_terminated_length": 979.0, + "epoch": 0.6864216054013503, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.090331992815985, + "kl": 0.0164947509765625, + "learning_rate": 3.44078487343661e-07, + "loss": 0.0144, + "num_tokens": 124159076.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0514845848083496, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2745 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1274.0, + "completions/mean_length": 1189.125, + "completions/mean_terminated_length": 1047.8182373046875, + "completions/min_length": 712.0, + "completions/min_terminated_length": 712.0, + "epoch": 0.6866716679169792, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.501803845043994, + "kl": 0.021392822265625, + "learning_rate": 3.4372929790682046e-07, + "loss": -0.0091, + "num_tokens": 124201894.0, + "reward": 0.0, + "reward_std": 0.7878705263137817, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.015460782432564447, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.027100796694214713, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17018508443151817, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2746 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1035.0, + "completions/max_terminated_length": 1035.0, + "completions/mean_length": 905.6875, + "completions/mean_terminated_length": 905.6875, + "completions/min_length": 765.0, + "completions/min_terminated_length": 765.0, + "epoch": 0.6869217304326082, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.89659097250972, + "kl": 0.0146331787109375, + "learning_rate": 3.4338026564139797e-07, + "loss": -0.0183, + "num_tokens": 124233577.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9892677068710327, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.023586514536966426, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06429789890439452, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14751020052613062, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2747 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1205.0, + "completions/mean_terminated_length": 1185.3333740234375, + "completions/min_length": 945.0, + "completions/min_terminated_length": 945.0, + "epoch": 0.687171792948237, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2458437187711966, + "kl": 0.023284912109375, + "learning_rate": 3.430313908133445e-07, + "loss": -0.0357, + "num_tokens": 124279529.0, + "reward": 0.0, + "reward_std": 0.8592180609703064, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.057013424676404766, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07771232672376555, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11147163731607214, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2748 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.75, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1364.0, + "completions/mean_length": 1440.4375, + "completions/mean_terminated_length": 1261.75, + "completions/min_length": 1022.0, + "completions/min_terminated_length": 1022.0, + "epoch": 0.6874218554638659, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.655611384017338, + "kl": 0.0162506103515625, + "learning_rate": 3.426826736884909e-07, + "loss": 0.0052, + "num_tokens": 124335720.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.3439657390117645, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.15176273684293978, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21171167097619215, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10610965676722955, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2749 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1329.25, + "completions/mean_terminated_length": 1272.3333740234375, + "completions/min_length": 879.0, + "completions/min_terminated_length": 879.0, + "epoch": 0.6876719179794949, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2925321596578874, + "kl": 0.0184326171875, + "learning_rate": 3.4233411453254816e-07, + "loss": -0.0373, + "num_tokens": 124393700.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0198737382888794, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03980169067363343, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12915570763899853, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2750 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1375.0, + "completions/mean_length": 1332.0625, + "completions/mean_terminated_length": 1116.1429443359375, + "completions/min_length": 981.0, + "completions/min_terminated_length": 981.0, + "epoch": 0.6879219804951238, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0691743934740887, + "kl": 0.012908935546875, + "learning_rate": 3.4198571361110666e-07, + "loss": -0.0053, + "num_tokens": 124453901.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0508397817611694, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09832669485279011, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07956579907123143, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15776212754932312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2751 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1453.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1128.9375, + "completions/mean_terminated_length": 1128.9375, + "completions/min_length": 691.0, + "completions/min_terminated_length": 691.0, + "epoch": 0.6881720430107527, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.836886957044039, + "kl": 0.024169921875, + "learning_rate": 3.416374711896365e-07, + "loss": -0.0476, + "num_tokens": 124505484.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.8797335624694824, + "rewards/wordcountpos_reward_GEOBench/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.19482166496735323, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2578725988411778, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11409872268574493, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2752 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1300.0, + "completions/max_terminated_length": 1300.0, + "completions/mean_length": 988.125, + "completions/mean_terminated_length": 988.125, + "completions/min_length": 832.0, + "completions/min_terminated_length": 832.0, + "epoch": 0.6884221055263816, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7654782189930383, + "kl": 0.0135345458984375, + "learning_rate": 3.412893875334867e-07, + "loss": -0.007, + "num_tokens": 124547758.0, + "reward": 0.0, + "reward_std": 0.6016427278518677, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03165596583154108, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04966750060148478, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08432740427115681, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2753 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1297.0, + "completions/mean_length": 1337.75, + "completions/mean_terminated_length": 1129.1429443359375, + "completions/min_length": 952.0, + "completions/min_terminated_length": 952.0, + "epoch": 0.6886721680420105, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7198683493874736, + "kl": 0.0137176513671875, + "learning_rate": 3.409414629078855e-07, + "loss": -0.056, + "num_tokens": 124611770.0, + "reward": 0.0, + "reward_std": 0.7077142596244812, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06309284276296527, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08898915821501607, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11474609652039006, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2754 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1214.875, + "completions/mean_terminated_length": 1195.86669921875, + "completions/min_length": 776.0, + "completions/min_terminated_length": 776.0, + "epoch": 0.6889222305576395, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8391977733937805, + "kl": 0.019805908203125, + "learning_rate": 3.405936975779399e-07, + "loss": -0.0464, + "num_tokens": 124657288.0, + "reward": 0.0, + "reward_std": 0.8396292328834534, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.012317608026552374, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1241529308803984, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0665276327996565, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2755 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 1351.625, + "completions/mean_terminated_length": 1262.5999755859375, + "completions/min_length": 841.0, + "completions/min_terminated_length": 841.0, + "epoch": 0.6891722930732683, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5115844875078928, + "kl": 0.0124359130859375, + "learning_rate": 3.4024609180863557e-07, + "loss": -0.0234, + "num_tokens": 124713050.0, + "reward": 3.725290298461914e-09, + "reward_std": 0.941968560218811, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.24690679003009003, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2756 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1302.0, + "completions/max_terminated_length": 1302.0, + "completions/mean_length": 870.1875, + "completions/mean_terminated_length": 870.1875, + "completions/min_length": 498.0, + "completions/min_terminated_length": 498.0, + "epoch": 0.6894223555888972, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9779471751356694, + "kl": 0.023773193359375, + "learning_rate": 3.398986458648364e-07, + "loss": -0.0216, + "num_tokens": 124763829.0, + "reward": 0.0, + "reward_std": 0.5082966685295105, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1347471158581273, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1486565664285557, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0758897836290186, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2757 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1420.0, + "completions/max_terminated_length": 1420.0, + "completions/mean_length": 1078.0, + "completions/mean_terminated_length": 1078.0, + "completions/min_length": 663.0, + "completions/min_terminated_length": 663.0, + "epoch": 0.6896724181045262, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4681779174122243, + "kl": 0.01904296875, + "learning_rate": 3.395513600112845e-07, + "loss": 0.0509, + "num_tokens": 124799005.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7673960328102112, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06304164307049341, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0891025018137843, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15341785110291775, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2758 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.8125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1475.125, + "completions/mean_terminated_length": 1367.3333740234375, + "completions/min_length": 1275.0, + "completions/min_terminated_length": 1275.0, + "epoch": 0.689922480620155, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.878319300565035, + "kl": 0.01806640625, + "learning_rate": 3.3920423451260054e-07, + "loss": 0.0114, + "num_tokens": 124857719.0, + "reward": 0.0, + "reward_std": 0.636101484298706, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07350164137283467, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09654833610888391, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1343709624716425, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2759 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1428.75, + "completions/mean_terminated_length": 1357.5, + "completions/min_length": 1234.0, + "completions/min_terminated_length": 1234.0, + "epoch": 0.6901725431357839, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.47081241766298, + "kl": 0.01129150390625, + "learning_rate": 3.3885726963328243e-07, + "loss": -0.0113, + "num_tokens": 124913211.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8958820700645447, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.012947599827448412, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12482502690326817, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2760 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1254.0, + "completions/max_terminated_length": 1254.0, + "completions/mean_length": 938.1875, + "completions/mean_terminated_length": 938.1875, + "completions/min_length": 762.0, + "completions/min_terminated_length": 762.0, + "epoch": 0.6904226056514129, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.043181584500242, + "kl": 0.021484375, + "learning_rate": 3.3851046563770614e-07, + "loss": 0.0036, + "num_tokens": 124955542.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8834588527679443, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06841036764397958, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15492268192569308, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1397749513934347, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2761 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1222.0, + "completions/max_terminated_length": 1222.0, + "completions/mean_length": 1005.0625, + "completions/mean_terminated_length": 1005.0625, + "completions/min_length": 771.0, + "completions/min_terminated_length": 771.0, + "epoch": 0.6906726681670418, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.5698549723813133, + "kl": 0.011993408203125, + "learning_rate": 3.3816382279012414e-07, + "loss": 0.0224, + "num_tokens": 124990775.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9047627449035645, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12396635166316482, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1295181889077571, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11979921473804349, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2762 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1465.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 988.375, + "completions/mean_terminated_length": 988.375, + "completions/min_length": 644.0, + "completions/min_terminated_length": 644.0, + "epoch": 0.6909227306826706, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.787779114619913, + "kl": 0.014129638671875, + "learning_rate": 3.378173413546673e-07, + "loss": -0.024, + "num_tokens": 125031509.0, + "reward": 0.0, + "reward_std": 0.49691349267959595, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04934998620808302, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14385170000080008, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11155467020454342, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2763 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1227.0, + "completions/max_terminated_length": 1227.0, + "completions/mean_length": 856.0, + "completions/mean_terminated_length": 856.0, + "completions/min_length": 694.0, + "completions/min_terminated_length": 694.0, + "epoch": 0.6911727931982996, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.309792410254782, + "kl": 0.0171356201171875, + "learning_rate": 3.374710215953428e-07, + "loss": 0.0049, + "num_tokens": 125069749.0, + "reward": 0.0, + "reward_std": 0.8750435709953308, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06382836603760784, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12465690618355932, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.18678567634829202, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2764 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1408.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 1055.125, + "completions/mean_terminated_length": 1055.125, + "completions/min_length": 842.0, + "completions/min_terminated_length": 842.0, + "epoch": 0.6914228557139285, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5945533587063268, + "kl": 0.02032470703125, + "learning_rate": 3.3712486377603454e-07, + "loss": -0.068, + "num_tokens": 125105119.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0670452117919922, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2765 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1468.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1202.0, + "completions/mean_terminated_length": 1202.0, + "completions/min_length": 755.0, + "completions/min_terminated_length": 755.0, + "epoch": 0.6916729182295573, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1211471065601906, + "kl": 0.009246826171875, + "learning_rate": 3.36778868160504e-07, + "loss": -0.0056, + "num_tokens": 125146767.0, + "reward": 0.0, + "reward_std": 0.5993356704711914, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08613723234342707, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06374671752476492, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2766 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1325.0, + "completions/mean_length": 994.0, + "completions/mean_terminated_length": 960.2667236328125, + "completions/min_length": 719.0, + "completions/min_terminated_length": 719.0, + "epoch": 0.6919229807451863, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8647122621934216, + "kl": 0.022216796875, + "learning_rate": 3.3643303501238765e-07, + "loss": 0.0094, + "num_tokens": 125191575.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9881387948989868, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009339435175730004, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12000687953110067, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13214750456578045, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2767 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1121.0, + "completions/max_terminated_length": 1121.0, + "completions/mean_length": 900.8125, + "completions/mean_terminated_length": 900.8125, + "completions/min_length": 675.0, + "completions/min_terminated_length": 675.0, + "epoch": 0.6921730432608152, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.406153455248064, + "kl": 0.01922607421875, + "learning_rate": 3.3608736459519894e-07, + "loss": -0.0216, + "num_tokens": 125233340.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9418033361434937, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04418875392123504, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03210789293459288, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12049281521534187, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2768 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1273.3125, + "completions/mean_terminated_length": 1097.0, + "completions/min_length": 801.0, + "completions/min_terminated_length": 801.0, + "epoch": 0.6924231057764441, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9565823312446464, + "kl": 0.01422119140625, + "learning_rate": 3.357418571723273e-07, + "loss": -0.0266, + "num_tokens": 125285865.0, + "reward": 0.0, + "reward_std": 0.9775973558425903, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1990790931590931, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.23023436750943135, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12583057392117916, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2769 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1288.75, + "completions/mean_terminated_length": 1162.0, + "completions/min_length": 912.0, + "completions/min_terminated_length": 912.0, + "epoch": 0.692673168292073, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1266628770732554, + "kl": 0.01849365234375, + "learning_rate": 3.353965130070381e-07, + "loss": 0.0014, + "num_tokens": 125333141.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.529407799243927, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.027114795540074996, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07562497570310979, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2770 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1222.0, + "completions/mean_terminated_length": 1203.4666748046875, + "completions/min_length": 1006.0, + "completions/min_terminated_length": 1006.0, + "epoch": 0.6929232308077019, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.857742314870041, + "kl": 0.01434326171875, + "learning_rate": 3.3505133236247185e-07, + "loss": -0.0051, + "num_tokens": 125375125.0, + "reward": 0.0, + "reward_std": 0.9846612215042114, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.012156579645406233, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10931745070737083, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1102186379345533, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2771 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1184.875, + "completions/mean_terminated_length": 1079.8333740234375, + "completions/min_length": 716.0, + "completions/min_terminated_length": 716.0, + "epoch": 0.6931732933233309, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1957941030974433, + "kl": 0.0196075439453125, + "learning_rate": 3.34706315501645e-07, + "loss": 0.0174, + "num_tokens": 125419755.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8131502866744995, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.13860088419158803, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10510713207932759, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11155467020454343, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2772 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1426.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1211.6875, + "completions/mean_terminated_length": 1211.6875, + "completions/min_length": 906.0, + "completions/min_terminated_length": 906.0, + "epoch": 0.6934233558389598, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.603800256501666, + "kl": 0.011077880859375, + "learning_rate": 3.3436146268744865e-07, + "loss": -0.0037, + "num_tokens": 125457974.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0246613025665283, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08280279889605054, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07420996927345756, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17121569675358278, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2773 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1220.0, + "completions/mean_length": 1141.25, + "completions/mean_terminated_length": 1090.0, + "completions/min_length": 788.0, + "completions/min_terminated_length": 788.0, + "epoch": 0.6936734183545886, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0221505793664067, + "kl": 0.0177459716796875, + "learning_rate": 3.3401677418264933e-07, + "loss": -0.0994, + "num_tokens": 125506962.0, + "reward": 0.0, + "reward_std": 1.036789894104004, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.015084855960926482, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.031316621772062625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13709958532503408, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2774 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1134.125, + "completions/mean_terminated_length": 1081.857177734375, + "completions/min_length": 681.0, + "completions/min_terminated_length": 681.0, + "epoch": 0.6939234808702176, + "frac_reward_zero_std": 0.0, + "grad_norm": 10.576489926298288, + "kl": 0.0373382568359375, + "learning_rate": 3.336722502498882e-07, + "loss": 0.0062, + "num_tokens": 125548372.0, + "reward": 1.862645149230957e-08, + "reward_std": 1.060377836227417, + "rewards/wordcountpos_reward_GEOBench/mean": 1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02104416352202587, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07002336472611187, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12931443160847217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2775 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1199.0, + "completions/mean_length": 979.9375, + "completions/mean_terminated_length": 945.2667236328125, + "completions/min_length": 806.0, + "completions/min_terminated_length": 806.0, + "epoch": 0.6941735433858465, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0110881573322925, + "kl": 0.01114654541015625, + "learning_rate": 3.333278911516809e-07, + "loss": 0.0626, + "num_tokens": 125578283.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0073083639144897, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.018529969021978895, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.026297683823814737, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258102, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2776 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1333.0625, + "completions/mean_terminated_length": 1257.181884765625, + "completions/min_length": 1064.0, + "completions/min_terminated_length": 1064.0, + "epoch": 0.6944236059014753, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8953175128201343, + "kl": 0.0140228271484375, + "learning_rate": 3.3298369715041783e-07, + "loss": -0.001, + "num_tokens": 125635164.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0382225513458252, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.048797057879261654, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08461739779261816, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869929, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2777 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1117.0, + "completions/max_terminated_length": 1117.0, + "completions/mean_length": 881.0, + "completions/mean_terminated_length": 881.0, + "completions/min_length": 547.0, + "completions/min_terminated_length": 547.0, + "epoch": 0.6946736684171043, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.812804658750332, + "kl": 0.016998291015625, + "learning_rate": 3.3263966850836345e-07, + "loss": 0.0613, + "num_tokens": 125665676.0, + "reward": 0.0, + "reward_std": 0.9688605070114136, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03352507736445719, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.071731665774105, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09339283817414602, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2778 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1449.6875, + "completions/mean_terminated_length": 1385.0001220703125, + "completions/min_length": 1172.0, + "completions/min_terminated_length": 1172.0, + "epoch": 0.6949237309327332, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.429301376005436, + "kl": 0.0108795166015625, + "learning_rate": 3.322958054876559e-07, + "loss": -0.0007, + "num_tokens": 125717183.0, + "reward": 0.0, + "reward_std": 0.4678923487663269, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10958104907597883, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15339367429752715, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14446581038560777, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 2779 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1372.5, + "completions/mean_terminated_length": 1245.0, + "completions/min_length": 1039.0, + "completions/min_terminated_length": 1039.0, + "epoch": 0.695173793448362, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5345938647458586, + "kl": 0.01385498046875, + "learning_rate": 3.3195210835030744e-07, + "loss": -0.0053, + "num_tokens": 125777247.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0476266145706177, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11274051654271315, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1175942085686823, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11547005383792518, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2780 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1339.0, + "completions/max_terminated_length": 1339.0, + "completions/mean_length": 832.375, + "completions/mean_terminated_length": 832.375, + "completions/min_length": 400.0, + "completions/min_terminated_length": 400.0, + "epoch": 0.695423855963991, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1676376881534085, + "kl": 0.015228271484375, + "learning_rate": 3.3160857735820395e-07, + "loss": -0.0337, + "num_tokens": 125806909.0, + "reward": 0.0, + "reward_std": 0.9770601987838745, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04733571876671394, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12375880888291244, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07490735018081414, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2781 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1356.625, + "completions/mean_terminated_length": 1291.45458984375, + "completions/min_length": 1094.0, + "completions/min_terminated_length": 1094.0, + "epoch": 0.6956739184796199, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.553439007946773, + "kl": 0.022735595703125, + "learning_rate": 3.3126521277310446e-07, + "loss": -0.037, + "num_tokens": 125873151.0, + "reward": 0.0, + "reward_std": 0.6315454840660095, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.014192729784492892, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.040105424859123516, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14981470036162822, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2782 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1240.5625, + "completions/mean_terminated_length": 1223.2667236328125, + "completions/min_length": 874.0, + "completions/min_terminated_length": 874.0, + "epoch": 0.6959239809952488, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0876849751219457, + "kl": 0.017333984375, + "learning_rate": 3.309220148566414e-07, + "loss": -0.0158, + "num_tokens": 125933288.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.068881869316101, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.001540104756725739, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13510972255244683, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1338185615204685, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2783 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1262.0, + "completions/max_terminated_length": 1262.0, + "completions/mean_length": 1019.9375, + "completions/mean_terminated_length": 1019.9375, + "completions/min_length": 655.0, + "completions/min_terminated_length": 655.0, + "epoch": 0.6961740435108777, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.573074422570016, + "kl": 0.01824951171875, + "learning_rate": 3.3057898387032064e-07, + "loss": -0.0002, + "num_tokens": 125968631.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6363564729690552, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.028585264753556116, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11000935857235036, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13305526559931294, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2784 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1264.0625, + "completions/mean_terminated_length": 1248.3333740234375, + "completions/min_length": 789.0, + "completions/min_terminated_length": 789.0, + "epoch": 0.6964241060265066, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8964625067290166, + "kl": 0.016143798828125, + "learning_rate": 3.3023612007551974e-07, + "loss": 0.0048, + "num_tokens": 126024768.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.046691656112671, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04833489431980953, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05994569194558762, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09269623828717428, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2785 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1340.0, + "completions/max_terminated_length": 1340.0, + "completions/mean_length": 1071.375, + "completions/mean_terminated_length": 1071.375, + "completions/min_length": 703.0, + "completions/min_terminated_length": 703.0, + "epoch": 0.6966741685421355, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.084025728306994, + "kl": 0.022857666015625, + "learning_rate": 3.2989342373348964e-07, + "loss": -0.0597, + "num_tokens": 126073502.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0106769800186157, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.015092536123958708, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04561372092916137, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11013459778666115, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2786 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1095.0625, + "completions/mean_terminated_length": 1068.0667724609375, + "completions/min_length": 635.0, + "completions/min_terminated_length": 635.0, + "epoch": 0.6969242310577645, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1457251287386687, + "kl": 0.014556884765625, + "learning_rate": 3.2955089510535385e-07, + "loss": -0.0032, + "num_tokens": 126118447.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0665299892425537, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.004911529997597195, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09008604632954832, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06426219440409446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2787 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1338.0, + "completions/max_terminated_length": 1338.0, + "completions/mean_length": 899.6875, + "completions/mean_terminated_length": 899.6875, + "completions/min_length": 647.0, + "completions/min_terminated_length": 647.0, + "epoch": 0.6971742935733933, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5785800060322797, + "kl": 0.0178985595703125, + "learning_rate": 3.2920853445210764e-07, + "loss": -0.0482, + "num_tokens": 126158178.0, + "reward": 0.0, + "reward_std": 0.6559244394302368, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0014482839923583308, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09039482120634089, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1387777332977422, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2788 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1434.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1190.0, + "completions/mean_terminated_length": 1190.0, + "completions/min_length": 986.0, + "completions/min_terminated_length": 986.0, + "epoch": 0.6974243560890222, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4311101922709657, + "kl": 0.018646240234375, + "learning_rate": 3.2886634203461847e-07, + "loss": -0.0045, + "num_tokens": 126199810.0, + "reward": -1.862645149230957e-08, + "reward_std": 1.0293006896972656, + "rewards/wordcountpos_reward_GEOBench/mean": -1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.016401245115499875, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09344670836286631, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563382, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2789 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1369.0, + "completions/mean_length": 1074.1875, + "completions/mean_terminated_length": 1045.800048828125, + "completions/min_length": 801.0, + "completions/min_terminated_length": 801.0, + "epoch": 0.6976744186046512, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1693917804873952, + "kl": 0.0170135498046875, + "learning_rate": 3.285243181136256e-07, + "loss": 0.0132, + "num_tokens": 126260837.0, + "reward": 0.0, + "reward_std": 0.409209668636322, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.29097935157105637, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3300960519375208, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13984117975602023, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2790 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1262.0, + "completions/mean_length": 1099.5, + "completions/mean_terminated_length": 1072.800048828125, + "completions/min_length": 790.0, + "completions/min_terminated_length": 790.0, + "epoch": 0.69792448112028, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.386191443069996, + "kl": 0.0173187255859375, + "learning_rate": 3.281824629497397e-07, + "loss": -0.0349, + "num_tokens": 126301829.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0405802726745605, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.027780351744568745, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06077754492642986, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09727776191382576, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2791 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1359.875, + "completions/mean_terminated_length": 1179.71435546875, + "completions/min_length": 787.0, + "completions/min_terminated_length": 787.0, + "epoch": 0.698174543635909, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.220786359435858, + "kl": 0.0120697021484375, + "learning_rate": 3.278407768034433e-07, + "loss": -0.0203, + "num_tokens": 126359283.0, + "reward": 0.0, + "reward_std": 0.762126624584198, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.028253952427683398, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12063641102727553, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1067187372905475, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2792 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1363.0625, + "completions/mean_terminated_length": 1280.9000244140625, + "completions/min_length": 844.0, + "completions/min_terminated_length": 844.0, + "epoch": 0.6984246061515379, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.038900151946407, + "kl": 0.0191650390625, + "learning_rate": 3.274992599350896e-07, + "loss": 0.0555, + "num_tokens": 126411468.0, + "reward": 0.0, + "reward_std": 0.7166562080383301, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08475871319692715, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2355856428603501, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11674600476945511, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2793 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1443.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1070.4375, + "completions/mean_terminated_length": 1070.4375, + "completions/min_length": 772.0, + "completions/min_terminated_length": 772.0, + "epoch": 0.6986746686671668, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6828223712622257, + "kl": 0.01947021484375, + "learning_rate": 3.2715791260490347e-07, + "loss": 0.044, + "num_tokens": 126444171.0, + "reward": 0.0, + "reward_std": 0.7783265709877014, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03517000844021991, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06579234281882383, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12171612389003693, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2794 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1383.0, + "completions/mean_length": 1202.5, + "completions/mean_terminated_length": 1182.666748046875, + "completions/min_length": 869.0, + "completions/min_terminated_length": 869.0, + "epoch": 0.6989247311827957, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4463813885324925, + "kl": 0.01508331298828125, + "learning_rate": 3.2681673507298e-07, + "loss": -0.0048, + "num_tokens": 126482875.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0639290809631348, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.052768872815549255, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.056346737258437536, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.053748384988656986, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2795 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1255.0, + "completions/max_terminated_length": 1255.0, + "completions/mean_length": 1013.75, + "completions/mean_terminated_length": 1013.75, + "completions/min_length": 794.0, + "completions/min_terminated_length": 794.0, + "epoch": 0.6991747936984246, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3228094453964734, + "kl": 0.0198211669921875, + "learning_rate": 3.2647572759928535e-07, + "loss": 0.0071, + "num_tokens": 126513807.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7020447254180908, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.040413959065309375, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.046532281917021696, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10532137766186216, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2796 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1111.0, + "completions/max_terminated_length": 1111.0, + "completions/mean_length": 913.6875, + "completions/mean_terminated_length": 913.6875, + "completions/min_length": 671.0, + "completions/min_terminated_length": 671.0, + "epoch": 0.6994248562140535, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2950842773644053, + "kl": 0.0194244384765625, + "learning_rate": 3.2613489044365534e-07, + "loss": -0.0364, + "num_tokens": 126553858.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0352706909179688, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.009918698994693031, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10778062658436575, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2797 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1256.125, + "completions/mean_terminated_length": 1221.2857666015625, + "completions/min_length": 1032.0, + "completions/min_terminated_length": 1032.0, + "epoch": 0.6996749187296825, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3292394528034532, + "kl": 0.0120086669921875, + "learning_rate": 3.2579422386579703e-07, + "loss": 0.0065, + "num_tokens": 126599892.0, + "reward": 0.0, + "reward_std": 0.8008177280426025, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009836749018523185, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.050225755641416986, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12524050936172842, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2798 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 742.0, + "completions/mean_length": 1060.375, + "completions/mean_terminated_length": 718.4444580078125, + "completions/min_length": 689.0, + "completions/min_terminated_length": 689.0, + "epoch": 0.6999249812453113, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.004491941952971, + "kl": 0.00984954833984375, + "learning_rate": 3.2545372812528694e-07, + "loss": -0.0333, + "num_tokens": 126643770.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8603571653366089, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10961170774237097, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0823761467614098, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05947299418254506, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2799 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1156.6875, + "completions/mean_terminated_length": 1077.4615478515625, + "completions/min_length": 690.0, + "completions/min_terminated_length": 690.0, + "epoch": 0.7001750437609402, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1266865877551884, + "kl": 0.020721435546875, + "learning_rate": 3.2511340348157136e-07, + "loss": -0.1062, + "num_tokens": 126676773.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8452605605125427, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009209964391201808, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05432240916019421, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10610965676722954, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2800 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1326.0, + "completions/mean_length": 1360.75, + "completions/mean_terminated_length": 1128.666748046875, + "completions/min_length": 911.0, + "completions/min_terminated_length": 911.0, + "epoch": 0.7004251062765692, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6811749036000467, + "kl": 0.0145111083984375, + "learning_rate": 3.247732501939666e-07, + "loss": 0.008, + "num_tokens": 126740529.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9345215559005737, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.15825513577623412, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17819620916118808, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1519624710005487, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2801 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1339.5, + "completions/mean_terminated_length": 1286.0, + "completions/min_length": 1138.0, + "completions/min_terminated_length": 1138.0, + "epoch": 0.700675168792198, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8905299162613924, + "kl": 0.0135955810546875, + "learning_rate": 3.2443326852165815e-07, + "loss": -0.0136, + "num_tokens": 126801793.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0236505270004272, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.009389369248447027, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07385243398323543, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10174405069512348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2802 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1289.0, + "completions/mean_length": 1186.4375, + "completions/mean_terminated_length": 1165.533447265625, + "completions/min_length": 1032.0, + "completions/min_terminated_length": 1032.0, + "epoch": 0.7009252313078269, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.471916654453378, + "kl": 0.017608642578125, + "learning_rate": 3.2409345872370043e-07, + "loss": -0.0459, + "num_tokens": 126854936.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0113085508346558, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10857796489081897, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1130229558040126, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08933913745655644, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2803 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1233.0, + "completions/mean_length": 1198.25, + "completions/mean_terminated_length": 896.5, + "completions/min_length": 589.0, + "completions/min_terminated_length": 589.0, + "epoch": 0.7011752938234559, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1554948897442254, + "kl": 0.0140228271484375, + "learning_rate": 3.237538210590173e-07, + "loss": -0.0573, + "num_tokens": 126912916.0, + "reward": 0.0, + "reward_std": 0.9904120564460754, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0363909853012259, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1066091991172613, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12852438880818895, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2804 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1309.0, + "completions/max_terminated_length": 1309.0, + "completions/mean_length": 878.125, + "completions/mean_terminated_length": 878.125, + "completions/min_length": 669.0, + "completions/min_terminated_length": 669.0, + "epoch": 0.7014253563390848, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7679962318444704, + "kl": 0.01751708984375, + "learning_rate": 3.234143557864015e-07, + "loss": -0.003, + "num_tokens": 126938702.0, + "reward": 0.0, + "reward_std": 0.4741680324077606, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02154207457215262, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03896476694936814, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6416666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066221, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2805 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1394.0, + "completions/mean_length": 1174.6875, + "completions/mean_terminated_length": 1153.0, + "completions/min_length": 948.0, + "completions/min_terminated_length": 948.0, + "epoch": 0.7016754188547136, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2039776104908375, + "kl": 0.01531982421875, + "learning_rate": 3.230750631645144e-07, + "loss": -0.0132, + "num_tokens": 126977441.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8342218399047852, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09775501122296308, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2206585671828784, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10470416879457554, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2806 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1467.0, + "completions/mean_length": 1324.9375, + "completions/mean_terminated_length": 1188.77783203125, + "completions/min_length": 812.0, + "completions/min_terminated_length": 812.0, + "epoch": 0.7019254813703426, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8682190419385813, + "kl": 0.0141754150390625, + "learning_rate": 3.227359434518855e-07, + "loss": -0.0356, + "num_tokens": 127037472.0, + "reward": 0.0, + "reward_std": 0.9605675935745239, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0007623020977443069, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08270715779873118, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0958393717904348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2807 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1218.875, + "completions/mean_terminated_length": 1200.1334228515625, + "completions/min_length": 941.0, + "completions/min_terminated_length": 941.0, + "epoch": 0.7021755438859715, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0261967084172556, + "kl": 0.0306396484375, + "learning_rate": 3.2239699690691276e-07, + "loss": 0.026, + "num_tokens": 127080886.0, + "reward": 0.0, + "reward_std": 0.9584397077560425, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08485801990852784, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06847647769064472, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952502, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2808 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1383.75, + "completions/mean_terminated_length": 1293.3333740234375, + "completions/min_length": 1094.0, + "completions/min_terminated_length": 1094.0, + "epoch": 0.7024256064016005, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8561578398034917, + "kl": 0.0152740478515625, + "learning_rate": 3.2205822378786216e-07, + "loss": -0.0075, + "num_tokens": 127137770.0, + "reward": 0.0, + "reward_std": 0.8603485822677612, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07511486089806617, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09000990587659076, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0938872452190116, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2809 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1444.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1208.8125, + "completions/mean_terminated_length": 1208.8125, + "completions/min_length": 971.0, + "completions/min_terminated_length": 971.0, + "epoch": 0.7026756689172293, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.41366360809989, + "kl": 0.01910400390625, + "learning_rate": 3.217196243528677e-07, + "loss": -0.0453, + "num_tokens": 127181055.0, + "reward": 0.0, + "reward_std": 0.9326069951057434, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.045562992396428324, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08054308951153483, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2810 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1262.375, + "completions/mean_terminated_length": 1228.4285888671875, + "completions/min_length": 1015.0, + "completions/min_terminated_length": 1015.0, + "epoch": 0.7029257314328582, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9403307778077292, + "kl": 0.02288818359375, + "learning_rate": 3.213811988599306e-07, + "loss": 0.0088, + "num_tokens": 127224405.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9466946125030518, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03272729559830909, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04405571863389025, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10749676997731401, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2811 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1293.5, + "completions/mean_terminated_length": 1279.7333984375, + "completions/min_length": 1075.0, + "completions/min_terminated_length": 1075.0, + "epoch": 0.7031757939484872, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.905467458112592, + "kl": 0.01434326171875, + "learning_rate": 3.210429475669202e-07, + "loss": 0.0018, + "num_tokens": 127280805.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.93485426902771, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06380356648365372, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15950221716796273, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2812 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1217.25, + "completions/mean_terminated_length": 1088.727294921875, + "completions/min_length": 834.0, + "completions/min_terminated_length": 834.0, + "epoch": 0.703425856464116, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.858984482927724, + "kl": 0.0146026611328125, + "learning_rate": 3.207048707315726e-07, + "loss": -0.0268, + "num_tokens": 127326441.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9203657507896423, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.045135005412286815, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0979020063973524, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17363222093882272, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2813 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1208.0, + "completions/mean_terminated_length": 1166.2857666015625, + "completions/min_length": 803.0, + "completions/min_terminated_length": 803.0, + "epoch": 0.7036759189797449, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.443995310603785, + "kl": 0.01763916015625, + "learning_rate": 3.2036696861149105e-07, + "loss": -0.0607, + "num_tokens": 127376473.0, + "reward": 4.470348358154297e-08, + "reward_std": 1.0450952053070068, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0009601389816866479, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02861066067256892, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2814 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1369.0625, + "completions/mean_terminated_length": 1238.125, + "completions/min_length": 905.0, + "completions/min_terminated_length": 905.0, + "epoch": 0.7039259814953739, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6182684683171824, + "kl": 0.0137481689453125, + "learning_rate": 3.2002924146414577e-07, + "loss": 0.0096, + "num_tokens": 127413410.0, + "reward": 0.0, + "reward_std": 0.980578601360321, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0548600143591793, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07717201619503711, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11547005383792518, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2815 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1405.0, + "completions/mean_length": 1322.75, + "completions/mean_terminated_length": 1145.5, + "completions/min_length": 827.0, + "completions/min_terminated_length": 827.0, + "epoch": 0.7041760440110028, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8088111810326417, + "kl": 0.016143798828125, + "learning_rate": 3.196916895468737e-07, + "loss": 0.0239, + "num_tokens": 127456678.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8607258796691895, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07183142916528525, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14082449317880263, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.18851269728333164, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2816 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1055.0, + "completions/max_terminated_length": 1055.0, + "completions/mean_length": 902.75, + "completions/mean_terminated_length": 902.75, + "completions/min_length": 564.0, + "completions/min_terminated_length": 564.0, + "epoch": 0.7044261065266316, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2356400557803755, + "kl": 0.016265869140625, + "learning_rate": 3.19354313116878e-07, + "loss": -0.0442, + "num_tokens": 127482650.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.7683053612709045, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02835051166381757, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06860806054224657, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901162, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 2817 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1405.0, + "completions/max_terminated_length": 1405.0, + "completions/mean_length": 1127.25, + "completions/mean_terminated_length": 1127.25, + "completions/min_length": 882.0, + "completions/min_terminated_length": 882.0, + "epoch": 0.7046761690422606, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.621735381097077, + "kl": 0.020751953125, + "learning_rate": 3.1901711243122825e-07, + "loss": -0.049, + "num_tokens": 127529294.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9423117637634277, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.048535357640342065, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05284811356757264, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08421753138505425, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2818 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 1177.625, + "completions/mean_terminated_length": 1156.1334228515625, + "completions/min_length": 947.0, + "completions/min_terminated_length": 947.0, + "epoch": 0.7049262315578895, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5131388573258198, + "kl": 0.011810302734375, + "learning_rate": 3.186800877468606e-07, + "loss": 0.0034, + "num_tokens": 127567448.0, + "reward": 0.0, + "reward_std": 0.7663891315460205, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.004110387815094651, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.055353743252269764, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09428090415820634, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2819 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1136.25, + "completions/mean_terminated_length": 1112.0, + "completions/min_length": 743.0, + "completions/min_terminated_length": 743.0, + "epoch": 0.7051762940735183, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4988985634435683, + "kl": 0.01959228515625, + "learning_rate": 3.1834323932057627e-07, + "loss": 0.0329, + "num_tokens": 127607380.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0337870121002197, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.047399877133983866, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08902919693358946, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09888264649460886, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2820 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1187.0, + "completions/max_terminated_length": 1187.0, + "completions/mean_length": 942.125, + "completions/mean_terminated_length": 942.125, + "completions/min_length": 672.0, + "completions/min_terminated_length": 672.0, + "epoch": 0.7054263565891473, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7145888975589236, + "kl": 0.020294189453125, + "learning_rate": 3.1800656740904255e-07, + "loss": -0.0439, + "num_tokens": 127658030.0, + "reward": 0.0, + "reward_std": 0.516117513179779, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07759731254586902, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07662963683623444, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869927, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2821 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1214.0, + "completions/max_terminated_length": 1214.0, + "completions/mean_length": 901.5625, + "completions/mean_terminated_length": 901.5625, + "completions/min_length": 749.0, + "completions/min_terminated_length": 749.0, + "epoch": 0.7056764191047762, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0404422899449055, + "kl": 0.00748443603515625, + "learning_rate": 3.176700722687924e-07, + "loss": -0.0713, + "num_tokens": 127698399.0, + "reward": 0.0, + "reward_std": 1.017988920211792, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03323297160726132, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08151653533314063, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563383, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2822 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1279.0, + "completions/mean_terminated_length": 1228.0, + "completions/min_length": 938.0, + "completions/min_terminated_length": 938.0, + "epoch": 0.705926481620405, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.736141939374089, + "kl": 0.0152740478515625, + "learning_rate": 3.1733375415622385e-07, + "loss": 0.0026, + "num_tokens": 127752831.0, + "reward": 0.0, + "reward_std": 0.3581562638282776, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03645460190861649, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07694283244028366, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087684, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2823 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1241.375, + "completions/mean_terminated_length": 1224.1334228515625, + "completions/min_length": 1021.0, + "completions/min_terminated_length": 1021.0, + "epoch": 0.706176544136034, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0935435981149744, + "kl": 0.0147247314453125, + "learning_rate": 3.169976133276002e-07, + "loss": -0.0075, + "num_tokens": 127795877.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9301435947418213, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.006198403656808819, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0805037845104681, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11155467020454343, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2824 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1348.125, + "completions/mean_terminated_length": 1313.0770263671875, + "completions/min_length": 1011.0, + "completions/min_terminated_length": 1011.0, + "epoch": 0.7064266066516629, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2133996886637624, + "kl": 0.0174102783203125, + "learning_rate": 3.166616500390495e-07, + "loss": -0.0194, + "num_tokens": 127845455.0, + "reward": -2.421438694000244e-08, + "reward_std": 0.9298534393310547, + "rewards/wordcountpos_reward_GEOBench/mean": -2.421438694000244e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0068683983215388445, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.027473593286155378, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11409872268574492, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2825 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1177.8125, + "completions/mean_terminated_length": 1156.3333740234375, + "completions/min_length": 861.0, + "completions/min_terminated_length": 861.0, + "epoch": 0.7066766691672918, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0923754169082502, + "kl": 0.03826904296875, + "learning_rate": 3.1632586454656474e-07, + "loss": -0.0092, + "num_tokens": 127889316.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6927551031112671, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04256880561422295, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06661776467425781, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06652763279965646, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2826 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1276.875, + "completions/mean_terminated_length": 1143.0, + "completions/min_length": 860.0, + "completions/min_terminated_length": 860.0, + "epoch": 0.7069267316829208, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1583819495684153, + "kl": 0.01837158203125, + "learning_rate": 3.1599025710600315e-07, + "loss": 0.0379, + "num_tokens": 127938490.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0038414001464844, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.14104329116835007, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06597844585006132, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563383, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2827 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.75, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1435.0, + "completions/mean_terminated_length": 1240.0, + "completions/min_length": 1101.0, + "completions/min_terminated_length": 1101.0, + "epoch": 0.7071767941985496, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.816913645883382, + "kl": 0.0165252685546875, + "learning_rate": 3.156548279730864e-07, + "loss": 0.0401, + "num_tokens": 127997770.0, + "reward": 0.0, + "reward_std": 0.9770745038986206, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04148151122515283, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07754122021778037, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17126976771553507, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2828 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1491.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1247.25, + "completions/mean_terminated_length": 1247.25, + "completions/min_length": 985.0, + "completions/min_terminated_length": 985.0, + "epoch": 0.7074268567141786, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5170313055864435, + "kl": 0.0190887451171875, + "learning_rate": 3.1531957740340055e-07, + "loss": -0.0209, + "num_tokens": 128042694.0, + "reward": 0.0, + "reward_std": 0.8562936782836914, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.14809021280795565, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07704305901496727, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13957607775504186, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2829 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1090.0, + "completions/max_terminated_length": 1090.0, + "completions/mean_length": 848.625, + "completions/mean_terminated_length": 848.625, + "completions/min_length": 617.0, + "completions/min_terminated_length": 617.0, + "epoch": 0.7076769192298075, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.84804560521609, + "kl": 0.023040771484375, + "learning_rate": 3.149845056523953e-07, + "loss": -0.0436, + "num_tokens": 128070936.0, + "reward": 0.0, + "reward_std": 1.026524543762207, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10190822199231575, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1801392432597578, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237135, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2830 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1472.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1105.625, + "completions/mean_terminated_length": 1105.625, + "completions/min_length": 711.0, + "completions/min_terminated_length": 711.0, + "epoch": 0.7079269817454363, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.390975504031947, + "kl": 0.015838623046875, + "learning_rate": 3.1464961297538416e-07, + "loss": -0.0519, + "num_tokens": 128108258.0, + "reward": 0.0, + "reward_std": 1.0321160554885864, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08490534203226935, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10775788808527118, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2831 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1406.0, + "completions/mean_length": 1203.625, + "completions/mean_terminated_length": 1161.2857666015625, + "completions/min_length": 853.0, + "completions/min_terminated_length": 853.0, + "epoch": 0.7081770442610653, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4170030910054034, + "kl": 0.01666259765625, + "learning_rate": 3.1431489962754416e-07, + "loss": -0.0704, + "num_tokens": 128159204.0, + "reward": 0.0, + "reward_std": 0.8056381940841675, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03886862933752821, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05105545301292194, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11344765475923412, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2832 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1273.4375, + "completions/mean_terminated_length": 1258.3333740234375, + "completions/min_length": 929.0, + "completions/min_terminated_length": 929.0, + "epoch": 0.7084271067766942, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.693744341114992, + "kl": 0.0153045654296875, + "learning_rate": 3.1398036586391586e-07, + "loss": -0.0206, + "num_tokens": 128213419.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7115117311477661, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.044466170826225025, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08797872289491358, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07187952884282611, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2833 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1356.0, + "completions/mean_length": 1331.9375, + "completions/mean_terminated_length": 1163.875, + "completions/min_length": 1014.0, + "completions/min_terminated_length": 1014.0, + "epoch": 0.708677169292323, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.134947169737172, + "kl": 0.0185546875, + "learning_rate": 3.1364601193940283e-07, + "loss": 0.0043, + "num_tokens": 128266618.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6875914335250854, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02190851117654648, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17516680166594992, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1140987226857449, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2834 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 1325.9375, + "completions/mean_terminated_length": 1221.5, + "completions/min_length": 966.0, + "completions/min_terminated_length": 966.0, + "epoch": 0.708927231807952, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3858431564913847, + "kl": 0.01873779296875, + "learning_rate": 3.133118381087714e-07, + "loss": 0.0145, + "num_tokens": 128315233.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9801381826400757, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.054459978026832716, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12038459450817698, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12164002752505566, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2835 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1296.25, + "completions/mean_terminated_length": 1267.1429443359375, + "completions/min_length": 932.0, + "completions/min_terminated_length": 932.0, + "epoch": 0.7091772943235809, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.587555574285808, + "kl": 0.0164947509765625, + "learning_rate": 3.129778446266514e-07, + "loss": 0.0037, + "num_tokens": 128361005.0, + "reward": 0.0, + "reward_std": 0.6894181370735168, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07003308592279786, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17402592206554504, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10878112581387149, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2836 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1095.25, + "completions/mean_terminated_length": 960.3333740234375, + "completions/min_length": 470.0, + "completions/min_terminated_length": 470.0, + "epoch": 0.7094273568392098, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.195410330160564, + "kl": 0.017425537109375, + "learning_rate": 3.126440317475346e-07, + "loss": -0.0686, + "num_tokens": 128406577.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.985083818435669, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.19307501303529598, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.34374238318938916, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2837 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1244.0, + "completions/max_terminated_length": 1244.0, + "completions/mean_length": 949.625, + "completions/mean_terminated_length": 949.625, + "completions/min_length": 857.0, + "completions/min_terminated_length": 857.0, + "epoch": 0.7096774193548387, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7833880757165503, + "kl": 0.009368896484375, + "learning_rate": 3.123103997257751e-07, + "loss": 0.0339, + "num_tokens": 128433083.0, + "reward": 0.0, + "reward_std": 1.0001343488693237, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.015972557685866983, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12440294966203323, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639735, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2838 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1420.5625, + "completions/mean_terminated_length": 1358.77783203125, + "completions/min_length": 1225.0, + "completions/min_terminated_length": 1225.0, + "epoch": 0.7099274818704676, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.62060095551107, + "kl": 0.0165557861328125, + "learning_rate": 3.119769488155894e-07, + "loss": -0.011, + "num_tokens": 128493724.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0624451637268066, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0014870843464282763, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15157448771172113, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15939701191492708, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2839 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1413.0, + "completions/max_terminated_length": 1413.0, + "completions/mean_length": 1061.75, + "completions/mean_terminated_length": 1061.75, + "completions/min_length": 847.0, + "completions/min_terminated_length": 847.0, + "epoch": 0.7101775443860965, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9521165301148486, + "kl": 0.02178955078125, + "learning_rate": 3.1164367927105627e-07, + "loss": 0.0044, + "num_tokens": 128526472.0, + "reward": 0.0, + "reward_std": 0.954200029373169, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03960488299116578, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06142987772815478, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2840 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1346.3125, + "completions/mean_terminated_length": 1226.77783203125, + "completions/min_length": 927.0, + "completions/min_terminated_length": 927.0, + "epoch": 0.7104276069017255, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.621846835209427, + "kl": 0.0143585205078125, + "learning_rate": 3.113105913461159e-07, + "loss": -0.0108, + "num_tokens": 128589573.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.46048831939697266, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02293630545147955, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0472503065931569, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.675, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14168300559373406, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2841 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1094.0, + "completions/max_terminated_length": 1094.0, + "completions/mean_length": 907.5, + "completions/mean_terminated_length": 907.5, + "completions/min_length": 702.0, + "completions/min_terminated_length": 702.0, + "epoch": 0.7106776694173543, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6354429661178846, + "kl": 0.0249786376953125, + "learning_rate": 3.1097768529457023e-07, + "loss": -0.0179, + "num_tokens": 128635717.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9319056868553162, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07932494866800577, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09588941122695566, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13709958532503408, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2842 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1201.4375, + "completions/mean_terminated_length": 1158.7857666015625, + "completions/min_length": 760.0, + "completions/min_terminated_length": 760.0, + "epoch": 0.7109277319329832, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.508032108387473, + "kl": 0.018280029296875, + "learning_rate": 3.106449613700826e-07, + "loss": -0.0549, + "num_tokens": 128682508.0, + "reward": 0.0, + "reward_std": 0.5846898555755615, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05828445015407144, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1527733197482924, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17384539747207065, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2843 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1289.0, + "completions/max_terminated_length": 1289.0, + "completions/mean_length": 942.0, + "completions/mean_terminated_length": 942.0, + "completions/min_length": 581.0, + "completions/min_terminated_length": 581.0, + "epoch": 0.7111777944486122, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9140966141699987, + "kl": 0.0187835693359375, + "learning_rate": 3.103124198261776e-07, + "loss": -0.0553, + "num_tokens": 128723228.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9195671677589417, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.009171326935253948, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03668530774101579, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10532137766186216, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2844 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1434.625, + "completions/mean_terminated_length": 1369.25, + "completions/min_length": 1301.0, + "completions/min_terminated_length": 1301.0, + "epoch": 0.711427856964241, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4854824896223224, + "kl": 0.0127410888671875, + "learning_rate": 3.0998006091624076e-07, + "loss": 0.0108, + "num_tokens": 128778390.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.7481124997138977, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.021975749151551857, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02919811469637233, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09727776191382574, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2845 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 838.0, + "completions/mean_length": 1058.375, + "completions/mean_terminated_length": 616.75, + "completions/min_length": 335.0, + "completions/min_terminated_length": 335.0, + "epoch": 0.7116779194798699, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.716623416116362, + "kl": 0.0117340087890625, + "learning_rate": 3.0964788489351835e-07, + "loss": 0.0407, + "num_tokens": 128838764.0, + "reward": 0.0, + "reward_std": 0.8652939796447754, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.007658235372489792, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04264521985564836, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09727776191382576, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2846 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1097.4375, + "completions/mean_terminated_length": 1039.9285888671875, + "completions/min_length": 659.0, + "completions/min_terminated_length": 659.0, + "epoch": 0.7119279819954989, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5380207299596167, + "kl": 0.0163116455078125, + "learning_rate": 3.0931589201111783e-07, + "loss": 0.0389, + "num_tokens": 128884835.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9652460813522339, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0196081261250174, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.024773211478033828, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797315, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2847 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1083.0, + "completions/max_terminated_length": 1083.0, + "completions/mean_length": 953.4375, + "completions/mean_terminated_length": 953.4375, + "completions/min_length": 873.0, + "completions/min_terminated_length": 873.0, + "epoch": 0.7121780445111278, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.146065304206294, + "kl": 0.01116943359375, + "learning_rate": 3.089840825220067e-07, + "loss": -0.0108, + "num_tokens": 128924490.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9567921161651611, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.015235263754240981, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10325565248278189, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.077817450199525, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2848 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1327.0, + "completions/max_terminated_length": 1327.0, + "completions/mean_length": 1017.0625, + "completions/mean_terminated_length": 1017.0625, + "completions/min_length": 686.0, + "completions/min_terminated_length": 686.0, + "epoch": 0.7124281070267567, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5636359552403096, + "kl": 0.02392578125, + "learning_rate": 3.086524566790126e-07, + "loss": 0.0027, + "num_tokens": 128968211.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0330629348754883, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00862586904737629, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04194967248170359, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10602235962635781, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2849 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1383.0, + "completions/mean_length": 1278.875, + "completions/mean_terminated_length": 1205.166748046875, + "completions/min_length": 1026.0, + "completions/min_terminated_length": 1026.0, + "epoch": 0.7126781695423856, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3176405450809745, + "kl": 0.0121002197265625, + "learning_rate": 3.0832101473482334e-07, + "loss": -0.0279, + "num_tokens": 129017393.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6707543730735779, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.17748617418101156, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2754451446241478, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05947299418254506, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2850 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1389.0, + "completions/mean_length": 1305.8125, + "completions/mean_terminated_length": 1217.5455322265625, + "completions/min_length": 1014.0, + "completions/min_terminated_length": 1014.0, + "epoch": 0.7129282320580145, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8980636511991262, + "kl": 0.018707275390625, + "learning_rate": 3.079897569419869e-07, + "loss": -0.0114, + "num_tokens": 129063574.0, + "reward": -2.60770320892334e-08, + "reward_std": 1.0679320096969604, + "rewards/wordcountpos_reward_GEOBench/mean": -2.60770320892334e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10036968702787748, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2851 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1011.8125, + "completions/mean_terminated_length": 979.2667236328125, + "completions/min_length": 655.0, + "completions/min_terminated_length": 655.0, + "epoch": 0.7131782945736435, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7758538489051183, + "kl": 0.0128021240234375, + "learning_rate": 3.076586835529105e-07, + "loss": 0.0254, + "num_tokens": 129116243.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8548996448516846, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.062237110364416634, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.308455003020443, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941139, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2852 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1176.375, + "completions/mean_terminated_length": 1130.1429443359375, + "completions/min_length": 918.0, + "completions/min_terminated_length": 918.0, + "epoch": 0.7134283570892723, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8266578738010133, + "kl": 0.0121612548828125, + "learning_rate": 3.0732779481986096e-07, + "loss": -0.0092, + "num_tokens": 129160169.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.04571533203125, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005066393496709105, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05835593311653164, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2853 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1346.6875, + "completions/mean_terminated_length": 1193.375, + "completions/min_length": 1062.0, + "completions/min_terminated_length": 1062.0, + "epoch": 0.7136784196049012, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1413799404496907, + "kl": 0.009063720703125, + "learning_rate": 3.069970909949647e-07, + "loss": -0.0035, + "num_tokens": 129202908.0, + "reward": 0.0, + "reward_std": 0.6547142267227173, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06716975371495072, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17638483605436467, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7124999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05821416398857662, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2854 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1164.375, + "completions/mean_terminated_length": 1086.923095703125, + "completions/min_length": 599.0, + "completions/min_terminated_length": 599.0, + "epoch": 0.7139284821205302, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2709960055443386, + "kl": 0.0194091796875, + "learning_rate": 3.0666657233020736e-07, + "loss": -0.0532, + "num_tokens": 129243706.0, + "reward": 0.0, + "reward_std": 0.508046567440033, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05785033095050428, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08904023921513686, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563383, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2855 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1173.125, + "completions/mean_terminated_length": 1126.4285888671875, + "completions/min_length": 854.0, + "completions/min_terminated_length": 854.0, + "epoch": 0.714178544636159, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.314165084181871, + "kl": 0.019500732421875, + "learning_rate": 3.063362390774324e-07, + "loss": 0.0096, + "num_tokens": 129292524.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.874286413192749, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.16285507414289035, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2825955148589591, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11474609652039006, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2856 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1391.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1078.0, + "completions/mean_terminated_length": 1078.0, + "completions/min_length": 689.0, + "completions/min_terminated_length": 689.0, + "epoch": 0.7144286071517879, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1675330748013315, + "kl": 0.01751708984375, + "learning_rate": 3.060060914883433e-07, + "loss": 0.066, + "num_tokens": 129337220.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.49305588006973267, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05665637891247896, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1027117684230829, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590966, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2857 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1241.8125, + "completions/mean_terminated_length": 1224.60009765625, + "completions/min_length": 999.0, + "completions/min_terminated_length": 999.0, + "epoch": 0.7146786696674169, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.937006700086138, + "kl": 0.015411376953125, + "learning_rate": 3.0567612981450165e-07, + "loss": -0.0166, + "num_tokens": 129390289.0, + "reward": 0.0, + "reward_std": 1.0208829641342163, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03816755052186195, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.028086048767440173, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1128748897706693, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2858 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1178.875, + "completions/mean_terminated_length": 1133.0, + "completions/min_length": 964.0, + "completions/min_terminated_length": 964.0, + "epoch": 0.7149287321830458, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1403490448936506, + "kl": 0.011932373046875, + "learning_rate": 3.0534635430732687e-07, + "loss": -0.0036, + "num_tokens": 129432895.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9647536277770996, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0023177930172625354, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06985864592886433, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.053748384988657014, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2859 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1463.0, + "completions/mean_terminated_length": 1401.3333740234375, + "completions/min_length": 1299.0, + "completions/min_terminated_length": 1299.0, + "epoch": 0.7151787946986746, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.4386060478763625, + "kl": 0.0081024169921875, + "learning_rate": 3.050167652180972e-07, + "loss": -0.001, + "num_tokens": 129479687.0, + "reward": 0.0, + "reward_std": 0.7827284336090088, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.16686735711314993, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3058465184137063, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503964, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2860 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 1359.25, + "completions/mean_terminated_length": 1218.5, + "completions/min_length": 1119.0, + "completions/min_terminated_length": 1119.0, + "epoch": 0.7154288572143036, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5144643816528665, + "kl": 0.0135345458984375, + "learning_rate": 3.0468736279794847e-07, + "loss": -0.0021, + "num_tokens": 129535051.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.47395020723342896, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07023311438788842, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07035151633358651, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14168300559373406, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2861 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1297.0, + "completions/mean_length": 1055.25, + "completions/mean_terminated_length": 1025.60009765625, + "completions/min_length": 718.0, + "completions/min_terminated_length": 718.0, + "epoch": 0.7156789197299325, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7404281090937137, + "kl": 0.0104827880859375, + "learning_rate": 3.0435814729787444e-07, + "loss": 0.0232, + "num_tokens": 129565671.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.009095311164856, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03273791439608814, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04869108557429704, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298364, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2862 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1404.0, + "completions/mean_length": 1134.9375, + "completions/mean_terminated_length": 1110.60009765625, + "completions/min_length": 834.0, + "completions/min_terminated_length": 834.0, + "epoch": 0.7159289822455613, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.976514901132297, + "kl": 0.01904296875, + "learning_rate": 3.040291189687262e-07, + "loss": -0.0317, + "num_tokens": 129611950.0, + "reward": 0.0, + "reward_std": 0.7420212030410767, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.2280731132510605, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14118568799151607, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639733, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2863 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1393.0, + "completions/mean_length": 1132.8125, + "completions/mean_terminated_length": 1108.3333740234375, + "completions/min_length": 741.0, + "completions/min_terminated_length": 741.0, + "epoch": 0.7161790447611903, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.066265585377172, + "kl": 0.018646240234375, + "learning_rate": 3.037002780612126e-07, + "loss": 0.0015, + "num_tokens": 129660155.0, + "reward": 0.0, + "reward_std": 0.5819937586784363, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011349833027881465, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14797441856786328, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1796601730428249, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2864 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1376.4375, + "completions/mean_terminated_length": 1252.875, + "completions/min_length": 1129.0, + "completions/min_terminated_length": 1129.0, + "epoch": 0.7164291072768192, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1514373026687417, + "kl": 0.019012451171875, + "learning_rate": 3.033716248258995e-07, + "loss": 0.0201, + "num_tokens": 129710338.0, + "reward": 0.0, + "reward_std": 0.5303640365600586, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01425152387012261, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11989056198817129, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.161245154965971, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2865 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1414.0, + "completions/mean_length": 1198.125, + "completions/mean_terminated_length": 1155.0, + "completions/min_length": 977.0, + "completions/min_terminated_length": 977.0, + "epoch": 0.7166791697924482, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.619127919695953, + "kl": 0.0179443359375, + "learning_rate": 3.030431595132097e-07, + "loss": -0.0086, + "num_tokens": 129758604.0, + "reward": 0.0, + "reward_std": 0.40741074085235596, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.44117448668399845, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.5001871231522923, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2866 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1302.0, + "completions/max_terminated_length": 1302.0, + "completions/mean_length": 996.5625, + "completions/mean_terminated_length": 996.5625, + "completions/min_length": 773.0, + "completions/min_terminated_length": 773.0, + "epoch": 0.716929232308077, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.921571379050507, + "kl": 0.0181427001953125, + "learning_rate": 3.027148823734227e-07, + "loss": -0.0701, + "num_tokens": 129799813.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0395245552062988, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.018293102464738677, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11035964946322054, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05443310539518172, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2867 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1330.25, + "completions/mean_terminated_length": 1306.0, + "completions/min_length": 1125.0, + "completions/min_terminated_length": 1125.0, + "epoch": 0.7171792948237059, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3540180688762895, + "kl": 0.0162200927734375, + "learning_rate": 3.0238679365667495e-07, + "loss": 0.01, + "num_tokens": 129855873.0, + "reward": 0.0, + "reward_std": 0.84827721118927, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.034604518442310786, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04557114133030653, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087683, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2868 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1227.0, + "completions/mean_length": 1147.1875, + "completions/mean_terminated_length": 1096.7857666015625, + "completions/min_length": 809.0, + "completions/min_terminated_length": 809.0, + "epoch": 0.7174293573393349, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.45651344548211, + "kl": 0.0186767578125, + "learning_rate": 3.0205889361295905e-07, + "loss": -0.0746, + "num_tokens": 129898612.0, + "reward": -2.2351741790771484e-08, + "reward_std": 0.9975383281707764, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03758951363398875, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05706354494455574, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2869 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1309.0, + "completions/mean_length": 1076.6875, + "completions/mean_terminated_length": 1016.21435546875, + "completions/min_length": 781.0, + "completions/min_terminated_length": 781.0, + "epoch": 0.7176794198549638, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.852584656271365, + "kl": 0.019561767578125, + "learning_rate": 3.017311824921238e-07, + "loss": -0.0251, + "num_tokens": 129937607.0, + "reward": 0.0, + "reward_std": 0.9208687543869019, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.035500694973591304, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07302541749111126, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12285191326386659, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2870 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1210.375, + "completions/mean_terminated_length": 1143.5384521484375, + "completions/min_length": 849.0, + "completions/min_terminated_length": 849.0, + "epoch": 0.7179294823705926, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.886702034875143, + "kl": 0.01580810546875, + "learning_rate": 3.0140366054387454e-07, + "loss": 0.0093, + "num_tokens": 129980061.0, + "reward": 2.60770320892334e-08, + "reward_std": 1.068171501159668, + "rewards/wordcountpos_reward_GEOBench/mean": 2.60770320892334e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0004986382663574513, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.013235296417839808, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.65, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10470416879457553, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2871 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1256.5, + "completions/mean_terminated_length": 1145.8182373046875, + "completions/min_length": 797.0, + "completions/min_terminated_length": 797.0, + "epoch": 0.7181795448862216, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5060945643142256, + "kl": 0.0153961181640625, + "learning_rate": 3.010763280177718e-07, + "loss": -0.0506, + "num_tokens": 130036973.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8256497383117676, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10459864493787685, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06553607347718875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10602235962635781, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2872 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1325.0, + "completions/mean_length": 1213.3125, + "completions/mean_terminated_length": 1117.75, + "completions/min_length": 967.0, + "completions/min_terminated_length": 967.0, + "epoch": 0.7184296074018505, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.881234765938073, + "kl": 0.0183563232421875, + "learning_rate": 3.0074918516323246e-07, + "loss": 0.0252, + "num_tokens": 130079562.0, + "reward": 0.0, + "reward_std": 0.6382900476455688, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11918011167875858, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18370176613241493, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13526380260918403, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2873 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1380.0, + "completions/max_terminated_length": 1380.0, + "completions/mean_length": 1136.5, + "completions/mean_terminated_length": 1136.5, + "completions/min_length": 266.0, + "completions/min_terminated_length": 266.0, + "epoch": 0.7186796699174793, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5053210372241788, + "kl": 0.0179443359375, + "learning_rate": 3.004222322295279e-07, + "loss": -0.0728, + "num_tokens": 130123202.0, + "reward": 0.0, + "reward_std": 1.0137368440628052, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0470306497372535, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06281655948064226, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12345339501504503, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2874 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1230.25, + "completions/mean_terminated_length": 1212.2667236328125, + "completions/min_length": 1029.0, + "completions/min_terminated_length": 1029.0, + "epoch": 0.7189297324331083, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0005538679969144, + "kl": 0.0141143798828125, + "learning_rate": 3.0009546946578577e-07, + "loss": -0.0469, + "num_tokens": 130168678.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5707080364227295, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.24414529995770978, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.35267090388212463, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2875 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 1245.875, + "completions/mean_terminated_length": 1161.166748046875, + "completions/min_length": 896.0, + "completions/min_terminated_length": 896.0, + "epoch": 0.7191797949487372, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.363028612830261, + "kl": 0.0203857421875, + "learning_rate": 2.9976889712098845e-07, + "loss": 0.0257, + "num_tokens": 130224540.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.4896252155303955, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.15861944228410396, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18462380008941318, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12852438880818895, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2876 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1408.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 1018.0, + "completions/mean_terminated_length": 1018.0, + "completions/min_length": 701.0, + "completions/min_terminated_length": 701.0, + "epoch": 0.719429857464366, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8181700424107463, + "kl": 0.02301025390625, + "learning_rate": 2.994425154439729e-07, + "loss": -0.0013, + "num_tokens": 130261444.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9688830375671387, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.141878925953186, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2877 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1040.1875, + "completions/mean_terminated_length": 1009.5333862304688, + "completions/min_length": 794.0, + "completions/min_terminated_length": 794.0, + "epoch": 0.719679919979995, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2428428499094406, + "kl": 0.018035888671875, + "learning_rate": 2.9911632468343176e-07, + "loss": -0.0156, + "num_tokens": 130299047.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7399611473083496, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.026634746720193215, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07193816544068096, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14907119849998599, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2878 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1103.625, + "completions/mean_terminated_length": 1077.2000732421875, + "completions/min_length": 836.0, + "completions/min_terminated_length": 836.0, + "epoch": 0.7199299824956239, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.620167078113148, + "kl": 0.020782470703125, + "learning_rate": 2.987903250879109e-07, + "loss": -0.0632, + "num_tokens": 130339993.0, + "reward": 0.0, + "reward_std": 0.4417513906955719, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0325306528219554, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12615818565841577, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07302967433402216, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2879 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1276.0, + "completions/mean_length": 1268.3125, + "completions/mean_terminated_length": 1036.625, + "completions/min_length": 902.0, + "completions/min_terminated_length": 902.0, + "epoch": 0.7201800450112528, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4651390329174343, + "kl": 0.023681640625, + "learning_rate": 2.9846451690581154e-07, + "loss": -0.0408, + "num_tokens": 130390638.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9701282382011414, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0692170945648039, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05212964342777218, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087681, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2880 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1280.0, + "completions/max_terminated_length": 1280.0, + "completions/mean_length": 1114.5625, + "completions/mean_terminated_length": 1114.5625, + "completions/min_length": 867.0, + "completions/min_terminated_length": 867.0, + "epoch": 0.7204301075268817, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7230450793795042, + "kl": 0.023193359375, + "learning_rate": 2.9813890038538827e-07, + "loss": 0.0085, + "num_tokens": 130431695.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0099595785140991, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02588224521235122, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07988324603198517, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333334, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2881 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1214.625, + "completions/mean_terminated_length": 1173.857177734375, + "completions/min_length": 881.0, + "completions/min_terminated_length": 881.0, + "epoch": 0.7206801700425106, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.803434918202901, + "kl": 0.01784515380859375, + "learning_rate": 2.978134757747505e-07, + "loss": -0.0206, + "num_tokens": 130469553.0, + "reward": 0.0, + "reward_std": 0.9441902041435242, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.008709250813966285, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.028468782797304804, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11147163731607215, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2882 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1254.0, + "completions/mean_length": 1077.875, + "completions/mean_terminated_length": 1049.7333984375, + "completions/min_length": 823.0, + "completions/min_terminated_length": 823.0, + "epoch": 0.7209302325581395, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0548597535417805, + "kl": 0.01470947265625, + "learning_rate": 2.9748824332186085e-07, + "loss": -0.0207, + "num_tokens": 130507927.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0450563430786133, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09339186992389567, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12709483741905292, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258102, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2883 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1393.0, + "completions/max_terminated_length": 1393.0, + "completions/mean_length": 1022.0, + "completions/mean_terminated_length": 1022.0, + "completions/min_length": 870.0, + "completions/min_terminated_length": 870.0, + "epoch": 0.7211802950737685, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7746488460639163, + "kl": 0.015249252319335938, + "learning_rate": 2.9716320327453547e-07, + "loss": 0.0149, + "num_tokens": 130547271.0, + "reward": 0.0, + "reward_std": 0.8900302648544312, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.028178739954174022, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08145358604777966, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15682025568335425, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2884 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1239.0, + "completions/max_terminated_length": 1239.0, + "completions/mean_length": 1018.5, + "completions/mean_terminated_length": 1018.5, + "completions/min_length": 857.0, + "completions/min_terminated_length": 857.0, + "epoch": 0.7214303575893973, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.546014647932345, + "kl": 0.0128021240234375, + "learning_rate": 2.9683835588044434e-07, + "loss": -0.0209, + "num_tokens": 130586295.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9011170268058777, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005493231656166252, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.151153149590969, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0709720863229836, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2885 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1244.25, + "completions/mean_terminated_length": 1090.800048828125, + "completions/min_length": 648.0, + "completions/min_terminated_length": 648.0, + "epoch": 0.7216804201050263, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9431495753989596, + "kl": 0.0188446044921875, + "learning_rate": 2.9651370138711013e-07, + "loss": 0.0301, + "num_tokens": 130632523.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0581213235855103, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04625991658349773, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12953601118488153, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1338185615204685, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2886 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1216.375, + "completions/mean_terminated_length": 1197.4666748046875, + "completions/min_length": 1019.0, + "completions/min_terminated_length": 1019.0, + "epoch": 0.7219304826206552, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1001414767977966, + "kl": 0.0147705078125, + "learning_rate": 2.961892400419089e-07, + "loss": -0.0066, + "num_tokens": 130679761.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.977541446685791, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.14559708905639313, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3184250386466958, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11474609652039006, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2887 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1143.875, + "completions/mean_terminated_length": 1093.0, + "completions/min_length": 892.0, + "completions/min_terminated_length": 892.0, + "epoch": 0.722180545136284, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.632520846337987, + "kl": 0.03424072265625, + "learning_rate": 2.958649720920692e-07, + "loss": -0.0247, + "num_tokens": 130708431.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9697908163070679, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07247059784908612, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07449955887045641, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08421753138505421, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2888 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 1372.875, + "completions/mean_terminated_length": 1245.75, + "completions/min_length": 983.0, + "completions/min_terminated_length": 983.0, + "epoch": 0.722430607651913, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.915979739555592, + "kl": 0.0093841552734375, + "learning_rate": 2.955408977846727e-07, + "loss": -0.052, + "num_tokens": 130767869.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9975847601890564, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.007190919845516175, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04315169293367138, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078611, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2889 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1346.0, + "completions/max_terminated_length": 1346.0, + "completions/mean_length": 1141.875, + "completions/mean_terminated_length": 1141.875, + "completions/min_length": 936.0, + "completions/min_terminated_length": 936.0, + "epoch": 0.7226806701675419, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.436711182854667, + "kl": 0.023651123046875, + "learning_rate": 2.952170173666534e-07, + "loss": -0.0585, + "num_tokens": 130815227.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.026441216468811, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11295260065619457, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11322816144729454, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05443310539518172, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2890 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1102.4375, + "completions/mean_terminated_length": 1075.933349609375, + "completions/min_length": 757.0, + "completions/min_terminated_length": 757.0, + "epoch": 0.7229307326831708, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1766188682367407, + "kl": 0.020660400390625, + "learning_rate": 2.9489333108479687e-07, + "loss": -0.0492, + "num_tokens": 130857754.0, + "reward": 0.0, + "reward_std": 0.5948337912559509, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04989741954502155, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0982211031181673, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2891 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1375.0, + "completions/max_terminated_length": 1375.0, + "completions/mean_length": 1184.8125, + "completions/mean_terminated_length": 1184.8125, + "completions/min_length": 985.0, + "completions/min_terminated_length": 985.0, + "epoch": 0.7231807951987997, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.926827412179615, + "kl": 0.014923095703125, + "learning_rate": 2.9456983918574164e-07, + "loss": -0.0083, + "num_tokens": 130900631.0, + "reward": 0.0, + "reward_std": 0.6953353881835938, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01933892824992779, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03571772045176437, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2892 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1255.0, + "completions/max_terminated_length": 1255.0, + "completions/mean_length": 1048.6875, + "completions/mean_terminated_length": 1048.6875, + "completions/min_length": 851.0, + "completions/min_terminated_length": 851.0, + "epoch": 0.7234308577144286, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8621434348527615, + "kl": 0.01873779296875, + "learning_rate": 2.942465419159778e-07, + "loss": -0.0473, + "num_tokens": 130956098.0, + "reward": 0.0, + "reward_std": 1.0547151565551758, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09594304524194346, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11297336578679092, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298364, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2893 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1276.5625, + "completions/mean_terminated_length": 1244.6429443359375, + "completions/min_length": 903.0, + "completions/min_terminated_length": 903.0, + "epoch": 0.7236809202300575, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9862551431596867, + "kl": 0.018310546875, + "learning_rate": 2.93923439521847e-07, + "loss": -0.0207, + "num_tokens": 131002771.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.045459508895874, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0942357453518152, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06806822195495058, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08944271909999159, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2894 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 966.125, + "completions/mean_terminated_length": 723.45458984375, + "completions/min_length": 445.0, + "completions/min_terminated_length": 445.0, + "epoch": 0.7239309827456865, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3104011684904977, + "kl": 0.0100555419921875, + "learning_rate": 2.936005322495425e-07, + "loss": -0.071, + "num_tokens": 131050221.0, + "reward": 0.0, + "reward_std": 0.859643280506134, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.043005991912449026, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.053990420905733416, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12995725793078622, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2895 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1458.625, + "completions/mean_terminated_length": 1405.4285888671875, + "completions/min_length": 1307.0, + "completions/min_terminated_length": 1307.0, + "epoch": 0.7241810452613153, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7762539071476398, + "kl": 0.01666259765625, + "learning_rate": 2.932778203451093e-07, + "loss": 0.0075, + "num_tokens": 131109767.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7752960920333862, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10705749066509114, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10766069824982606, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09339283817414602, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2896 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1243.5, + "completions/mean_terminated_length": 1226.4000244140625, + "completions/min_length": 1041.0, + "completions/min_terminated_length": 1041.0, + "epoch": 0.7244311077769442, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.584093756077554, + "kl": 0.0150146484375, + "learning_rate": 2.929553040544426e-07, + "loss": -0.0323, + "num_tokens": 131153223.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8050382137298584, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.005653785355258287, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08096410109693529, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567835, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 2897 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1421.125, + "completions/mean_terminated_length": 1319.71435546875, + "completions/min_length": 992.0, + "completions/min_terminated_length": 992.0, + "epoch": 0.7246811702925732, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.923794457065695, + "kl": 0.018463134765625, + "learning_rate": 2.9263298362328916e-07, + "loss": -0.0017, + "num_tokens": 131203137.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9002468585968018, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.16050539462068886, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17392085171378155, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06652763279965648, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2898 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1403.0, + "completions/mean_length": 1255.3125, + "completions/mean_terminated_length": 1220.357177734375, + "completions/min_length": 1029.0, + "completions/min_terminated_length": 1029.0, + "epoch": 0.724931232808202, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9748421448485245, + "kl": 0.014739990234375, + "learning_rate": 2.923108592972467e-07, + "loss": 0.0084, + "num_tokens": 131252974.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9644142389297485, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08404540031727346, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15878866035527428, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09269623828717428, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2899 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1280.5, + "completions/mean_terminated_length": 1265.86669921875, + "completions/min_length": 881.0, + "completions/min_terminated_length": 881.0, + "epoch": 0.7251812953238309, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.356695197428588, + "kl": 0.0218505859375, + "learning_rate": 2.919889313217633e-07, + "loss": 0.0025, + "num_tokens": 131300054.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.0617353916168213, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05886381132018725, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10730510112538608, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10878112581387149, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2900 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1180.0, + "completions/mean_terminated_length": 1106.1539306640625, + "completions/min_length": 931.0, + "completions/min_terminated_length": 931.0, + "epoch": 0.7254313578394599, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.602777930041237, + "kl": 0.021148681640625, + "learning_rate": 2.916671999421372e-07, + "loss": -0.0018, + "num_tokens": 131345358.0, + "reward": 0.0, + "reward_std": 0.5519723892211914, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.24599926718919463, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.41670218614980054, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.5958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.27911898431924587, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2901 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1194.0, + "completions/max_terminated_length": 1194.0, + "completions/mean_length": 877.0625, + "completions/mean_terminated_length": 877.0625, + "completions/min_length": 763.0, + "completions/min_terminated_length": 763.0, + "epoch": 0.7256814203550888, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7251107187486814, + "kl": 0.017059326171875, + "learning_rate": 2.913456654035169e-07, + "loss": 0.0177, + "num_tokens": 131376663.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8915010690689087, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.019584099867906407, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0513889526238444, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11603000888978234, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2902 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1382.0, + "completions/max_terminated_length": 1382.0, + "completions/mean_length": 1105.0625, + "completions/mean_terminated_length": 1105.0625, + "completions/min_length": 586.0, + "completions/min_terminated_length": 586.0, + "epoch": 0.7259314828707176, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1460145139666698, + "kl": 0.0188446044921875, + "learning_rate": 2.9102432795090126e-07, + "loss": -0.0024, + "num_tokens": 131423336.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0537718534469604, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04832061768101551, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1237718882947551, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941139, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2903 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1386.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 1035.6875, + "completions/mean_terminated_length": 1035.6875, + "completions/min_length": 854.0, + "completions/min_terminated_length": 854.0, + "epoch": 0.7261815453863466, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0638724383495086, + "kl": 0.015960693359375, + "learning_rate": 2.907031878291387e-07, + "loss": -0.0378, + "num_tokens": 131455675.0, + "reward": 0.0, + "reward_std": 0.98621666431427, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1029436984532923, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14043542787699834, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09259629622222519, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2904 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1345.4375, + "completions/mean_terminated_length": 1225.2222900390625, + "completions/min_length": 1088.0, + "completions/min_terminated_length": 1088.0, + "epoch": 0.7264316079019755, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.896472171182283, + "kl": 0.0170745849609375, + "learning_rate": 2.90382245282927e-07, + "loss": -0.0127, + "num_tokens": 131506002.0, + "reward": 0.0, + "reward_std": 0.8021248579025269, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05470021302367889, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10331621144228664, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353543, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2905 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1183.4375, + "completions/mean_terminated_length": 1162.3333740234375, + "completions/min_length": 911.0, + "completions/min_terminated_length": 911.0, + "epoch": 0.7266816704176045, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.278113872978817, + "kl": 0.018157958984375, + "learning_rate": 2.9006150055681413e-07, + "loss": 0.0047, + "num_tokens": 131558617.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.024271011352539, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06863146944700482, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06713812757095666, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11917929226045818, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2906 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1118.4375, + "completions/mean_terminated_length": 945.0, + "completions/min_length": 681.0, + "completions/min_terminated_length": 681.0, + "epoch": 0.7269317329332333, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.026403771722344, + "kl": 0.0153656005859375, + "learning_rate": 2.8974095389519674e-07, + "loss": 0.0016, + "num_tokens": 131612056.0, + "reward": 0.0, + "reward_std": 1.0306792259216309, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.123209271031714, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16783119088039136, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2907 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1376.0, + "completions/mean_terminated_length": 1319.6363525390625, + "completions/min_length": 1022.0, + "completions/min_terminated_length": 1022.0, + "epoch": 0.7271817954488622, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1588403738137223, + "kl": 0.0162811279296875, + "learning_rate": 2.89420605542321e-07, + "loss": -0.0412, + "num_tokens": 131658648.0, + "reward": 0.0, + "reward_std": 0.7288148403167725, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11669356048896407, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12519350145874458, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639736, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2908 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1418.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 1140.875, + "completions/mean_terminated_length": 1140.875, + "completions/min_length": 906.0, + "completions/min_terminated_length": 906.0, + "epoch": 0.7274318579644912, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.012230238061764, + "kl": 0.019073486328125, + "learning_rate": 2.89100455742281e-07, + "loss": -0.0224, + "num_tokens": 131698294.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7730937600135803, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03437326449707235, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04564675362751511, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567835, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2909 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1475.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1131.125, + "completions/mean_terminated_length": 1131.125, + "completions/min_length": 763.0, + "completions/min_terminated_length": 763.0, + "epoch": 0.72768192048012, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.300278535506199, + "kl": 0.0177459716796875, + "learning_rate": 2.8878050473902094e-07, + "loss": 0.0517, + "num_tokens": 131731120.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0107311010360718, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.006229483203495086, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.026999344274101066, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408155, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2910 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1327.0, + "completions/max_terminated_length": 1327.0, + "completions/mean_length": 1126.3125, + "completions/mean_terminated_length": 1126.3125, + "completions/min_length": 925.0, + "completions/min_terminated_length": 925.0, + "epoch": 0.7279319829957489, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0089477379277634, + "kl": 0.01111602783203125, + "learning_rate": 2.8846075277633254e-07, + "loss": -0.012, + "num_tokens": 131776637.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9039648771286011, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.012006918519844204, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0793937065321064, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11147163731607214, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2911 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1330.3125, + "completions/mean_terminated_length": 1291.1539306640625, + "completions/min_length": 1065.0, + "completions/min_terminated_length": 1065.0, + "epoch": 0.7281820455113779, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2441617968285663, + "kl": 0.014190673828125, + "learning_rate": 2.88141200097856e-07, + "loss": 0.0139, + "num_tokens": 131821610.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0588923692703247, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06952159281041477, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0545472484097678, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639736, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2912 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1211.0, + "completions/mean_length": 1234.4375, + "completions/mean_terminated_length": 1027.888916015625, + "completions/min_length": 777.0, + "completions/min_terminated_length": 777.0, + "epoch": 0.7284321080270068, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2990323923410654, + "kl": 0.0185394287109375, + "learning_rate": 2.878218469470803e-07, + "loss": -0.0063, + "num_tokens": 131875177.0, + "reward": 0.0, + "reward_std": 0.5370532870292664, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17213259316477408, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2913 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 1286.5625, + "completions/mean_terminated_length": 1237.3077392578125, + "completions/min_length": 889.0, + "completions/min_terminated_length": 889.0, + "epoch": 0.7286821705426356, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2266283281378554, + "kl": 0.0196533203125, + "learning_rate": 2.8750269356734185e-07, + "loss": -0.0242, + "num_tokens": 131930146.0, + "reward": 0.0, + "reward_std": 0.9718354940414429, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01966010535608184, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11000611234409366, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1397749513934347, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2914 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1206.625, + "completions/mean_terminated_length": 1187.0667724609375, + "completions/min_length": 915.0, + "completions/min_terminated_length": 915.0, + "epoch": 0.7289322330582646, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.407436569289197, + "kl": 0.021942138671875, + "learning_rate": 2.871837402018246e-07, + "loss": -0.0022, + "num_tokens": 131979812.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9700183868408203, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06291781535136695, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08995957549480557, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07084150279686705, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2915 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1301.0, + "completions/mean_length": 1071.0, + "completions/mean_terminated_length": 737.3333129882812, + "completions/min_length": 560.0, + "completions/min_terminated_length": 560.0, + "epoch": 0.7291822955738935, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3040738749702077, + "kl": 0.0184783935546875, + "learning_rate": 2.8686498709356045e-07, + "loss": -0.0463, + "num_tokens": 132035180.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9744203090667725, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02255559049515625, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09472133382370652, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08933913745655642, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2916 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1395.0, + "completions/max_terminated_length": 1395.0, + "completions/mean_length": 1156.6875, + "completions/mean_terminated_length": 1156.6875, + "completions/min_length": 404.0, + "completions/min_terminated_length": 404.0, + "epoch": 0.7294323580895223, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1243551031243344, + "kl": 0.00792694091796875, + "learning_rate": 2.8654643448542883e-07, + "loss": -0.0642, + "num_tokens": 132089759.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0432828664779663, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.14231570965261495, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1494439824862522, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15770342536029575, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2917 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1325.5, + "completions/mean_terminated_length": 1313.86669921875, + "completions/min_length": 1004.0, + "completions/min_terminated_length": 1004.0, + "epoch": 0.7296824206051513, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.47119326487972, + "kl": 0.011871337890625, + "learning_rate": 2.862280826201563e-07, + "loss": -0.0081, + "num_tokens": 132135039.0, + "reward": 0.0, + "reward_std": 0.9603132009506226, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.045932085053917264, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10783138647324063, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12224747213928167, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2918 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1161.0, + "completions/max_terminated_length": 1161.0, + "completions/mean_length": 931.9375, + "completions/mean_terminated_length": 931.9375, + "completions/min_length": 724.0, + "completions/min_terminated_length": 724.0, + "epoch": 0.7299324831207802, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.856049491198846, + "kl": 0.026336669921875, + "learning_rate": 2.859099317403162e-07, + "loss": -0.0574, + "num_tokens": 132171758.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0142403841018677, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05693070211430336, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08931222964705784, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10027739304327551, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2919 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1373.0, + "completions/mean_length": 1156.875, + "completions/mean_terminated_length": 1107.857177734375, + "completions/min_length": 797.0, + "completions/min_terminated_length": 797.0, + "epoch": 0.730182545636409, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0881107312052123, + "kl": 0.019378662109375, + "learning_rate": 2.85591982088329e-07, + "loss": 0.0394, + "num_tokens": 132222772.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7540292739868164, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.026685092899204267, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06848331274638993, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13601470508735444, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2920 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1371.0, + "completions/max_terminated_length": 1371.0, + "completions/mean_length": 966.5, + "completions/mean_terminated_length": 966.5, + "completions/min_length": 614.0, + "completions/min_terminated_length": 614.0, + "epoch": 0.730432608152038, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2839948170883964, + "kl": 0.0197601318359375, + "learning_rate": 2.852742339064619e-07, + "loss": -0.0091, + "num_tokens": 132259412.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.760249137878418, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04713870798128664, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09989167444181896, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0739118594202782, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2921 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1419.0, + "completions/max_terminated_length": 1419.0, + "completions/mean_length": 1059.125, + "completions/mean_terminated_length": 1059.125, + "completions/min_length": 718.0, + "completions/min_terminated_length": 718.0, + "epoch": 0.7306826706676669, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1294533662131507, + "kl": 0.014068603515625, + "learning_rate": 2.8495668743682826e-07, + "loss": 0.0086, + "num_tokens": 132287966.0, + "reward": 0.0, + "reward_std": 0.6241884231567383, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0007716444316016847, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05364877346049243, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11279282877125756, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2922 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1361.0, + "completions/mean_length": 1159.4375, + "completions/mean_terminated_length": 1136.7333984375, + "completions/min_length": 877.0, + "completions/min_terminated_length": 877.0, + "epoch": 0.7309327331832958, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4712561347164503, + "kl": 0.0098724365234375, + "learning_rate": 2.846393429213878e-07, + "loss": -0.0075, + "num_tokens": 132320133.0, + "reward": 0.0, + "reward_std": 0.6292197704315186, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.039834961178606694, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08412786424784359, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10036968702787746, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2923 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1255.625, + "completions/mean_terminated_length": 1199.2308349609375, + "completions/min_length": 874.0, + "completions/min_terminated_length": 874.0, + "epoch": 0.7311827956989247, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2854634893664887, + "kl": 0.0156097412109375, + "learning_rate": 2.8432220060194696e-07, + "loss": -0.0092, + "num_tokens": 132364079.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7864985466003418, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04986364094385054, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10810034460885184, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0718795288428261, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2924 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1094.0, + "completions/max_terminated_length": 1094.0, + "completions/mean_length": 937.625, + "completions/mean_terminated_length": 937.625, + "completions/min_length": 690.0, + "completions/min_terminated_length": 690.0, + "epoch": 0.7314328582145536, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.273067019261239, + "kl": 0.009891510009765625, + "learning_rate": 2.840052607201573e-07, + "loss": -0.0015, + "num_tokens": 132400921.0, + "reward": 0.0, + "reward_std": 1.0528371334075928, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1306270236580186, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10987548913146644, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.053748384988657034, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2925 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1247.0, + "completions/max_terminated_length": 1247.0, + "completions/mean_length": 1087.9375, + "completions/mean_terminated_length": 1087.9375, + "completions/min_length": 796.0, + "completions/min_terminated_length": 796.0, + "epoch": 0.7316829207301826, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.061578234250425, + "kl": 0.0162506103515625, + "learning_rate": 2.836885235175166e-07, + "loss": 0.0153, + "num_tokens": 132434512.0, + "reward": 0.0, + "reward_std": 0.5636850595474243, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0003484883671999574, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18230969445655967, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026005, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2926 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1054.0, + "completions/max_terminated_length": 1054.0, + "completions/mean_length": 850.3125, + "completions/mean_terminated_length": 850.3125, + "completions/min_length": 630.0, + "completions/min_terminated_length": 630.0, + "epoch": 0.7319329832458115, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.588899841284827, + "kl": 0.022308349609375, + "learning_rate": 2.8337198923536805e-07, + "loss": -0.0371, + "num_tokens": 132486029.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.036867618560791, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06804908580660861, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1857792725894202, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12931443160847217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2927 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1451.0, + "completions/max_terminated_length": 1451.0, + "completions/mean_length": 1178.125, + "completions/mean_terminated_length": 1178.125, + "completions/min_length": 951.0, + "completions/min_terminated_length": 951.0, + "epoch": 0.7321830457614403, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2384728545430472, + "kl": 0.01568603515625, + "learning_rate": 2.830556581149002e-07, + "loss": -0.012, + "num_tokens": 132536383.0, + "reward": -2.2351741790771484e-08, + "reward_std": 0.9911494255065918, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.016197071944828438, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07790183008010584, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2928 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1296.3125, + "completions/mean_terminated_length": 1282.7333984375, + "completions/min_length": 1027.0, + "completions/min_terminated_length": 1027.0, + "epoch": 0.7324331082770693, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.236666086073392, + "kl": 0.017608642578125, + "learning_rate": 2.8273953039714695e-07, + "loss": -0.0073, + "num_tokens": 132590884.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8156754970550537, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.048171422969999544, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06305640163672852, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2929 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1478.875, + "completions/mean_terminated_length": 1331.0, + "completions/min_length": 1202.0, + "completions/min_terminated_length": 1202.0, + "epoch": 0.7326831707926982, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.299237671722306, + "kl": 0.0136871337890625, + "learning_rate": 2.824236063229868e-07, + "loss": -0.0132, + "num_tokens": 132662546.0, + "reward": 0.0, + "reward_std": 0.9933339953422546, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1163415955041525, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20336118010670715, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11013459778666118, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2930 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1322.0, + "completions/mean_length": 1013.0625, + "completions/mean_terminated_length": 943.5000610351562, + "completions/min_length": 658.0, + "completions/min_terminated_length": 658.0, + "epoch": 0.732933233308327, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6476087160663733, + "kl": 0.018768310546875, + "learning_rate": 2.82107886133144e-07, + "loss": 0.0346, + "num_tokens": 132695579.0, + "reward": 2.60770320892334e-08, + "reward_std": 1.0116575956344604, + "rewards/wordcountpos_reward_GEOBench/mean": 2.60770320892334e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06561205082769965, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04457877660522309, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09583937179043481, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2931 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1258.5, + "completions/mean_terminated_length": 1148.727294921875, + "completions/min_length": 852.0, + "completions/min_terminated_length": 852.0, + "epoch": 0.733183295823956, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.954407880813376, + "kl": 0.018890380859375, + "learning_rate": 2.817923700681863e-07, + "loss": -0.0003, + "num_tokens": 132762483.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.048785924911499, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11329054685596346, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21092764231921304, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563384, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2932 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1343.875, + "completions/mean_terminated_length": 1250.2000732421875, + "completions/min_length": 1082.0, + "completions/min_terminated_length": 1082.0, + "epoch": 0.7334333583395849, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.98106089040106, + "kl": 0.0187225341796875, + "learning_rate": 2.8147705836852643e-07, + "loss": -0.0307, + "num_tokens": 132812121.0, + "reward": 0.0, + "reward_std": 0.6927893161773682, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.015867622935530623, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10458667894688628, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16049691355057039, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2933 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1178.75, + "completions/mean_terminated_length": 1157.3333740234375, + "completions/min_length": 857.0, + "completions/min_terminated_length": 857.0, + "epoch": 0.7336834208552138, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2916913925940063, + "kl": 0.019012451171875, + "learning_rate": 2.811619512744219e-07, + "loss": -0.0227, + "num_tokens": 132856413.0, + "reward": 0.0, + "reward_std": 0.8620816469192505, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02197228064098719, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04514389384785305, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09108400680852977, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2934 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1378.875, + "completions/mean_terminated_length": 1323.8182373046875, + "completions/min_length": 1030.0, + "completions/min_terminated_length": 1030.0, + "epoch": 0.7339334833708427, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3337807511964828, + "kl": 0.0151519775390625, + "learning_rate": 2.808470490259735e-07, + "loss": 0.0083, + "num_tokens": 132895827.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8428303003311157, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06149878833520425, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04537742041316154, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0859586463881842, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2935 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1355.1875, + "completions/mean_terminated_length": 1321.769287109375, + "completions/min_length": 865.0, + "completions/min_terminated_length": 865.0, + "epoch": 0.7341835458864716, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.230911414234537, + "kl": 0.018463134765625, + "learning_rate": 2.805323518631263e-07, + "loss": -0.007, + "num_tokens": 132956094.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0307257175445557, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06432573734854433, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13612597892219633, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408155, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2936 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.75, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1434.3125, + "completions/mean_terminated_length": 1237.25, + "completions/min_length": 891.0, + "completions/min_terminated_length": 891.0, + "epoch": 0.7344336084021005, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4673966645161163, + "kl": 0.0112457275390625, + "learning_rate": 2.8021786002566914e-07, + "loss": 0.0218, + "num_tokens": 133018771.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0439451932907104, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09468386939389592, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2640169079665964, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10318986456114841, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2937 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1419.0, + "completions/max_terminated_length": 1419.0, + "completions/mean_length": 1115.875, + "completions/mean_terminated_length": 1115.875, + "completions/min_length": 844.0, + "completions/min_terminated_length": 844.0, + "epoch": 0.7346836709177295, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7065999698999375, + "kl": 0.011474609375, + "learning_rate": 2.799035737532344e-07, + "loss": -0.0334, + "num_tokens": 133071297.0, + "reward": 0.0, + "reward_std": 0.8181681632995605, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09031301256405144, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07354496521686024, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14343665526661614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2938 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1411.0, + "completions/mean_length": 1063.75, + "completions/mean_terminated_length": 918.3333740234375, + "completions/min_length": 652.0, + "completions/min_terminated_length": 652.0, + "epoch": 0.7349337334333583, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.002767744244612, + "kl": 0.0165863037109375, + "learning_rate": 2.7958949328529763e-07, + "loss": -0.0257, + "num_tokens": 133114077.0, + "reward": 0.0, + "reward_std": 0.3997049331665039, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.041522097318851335, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05513584671095679, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1102186379345533, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2939 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1218.375, + "completions/mean_terminated_length": 1178.1429443359375, + "completions/min_length": 785.0, + "completions/min_terminated_length": 785.0, + "epoch": 0.7351837959489872, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.316724301428203, + "kl": 0.0169525146484375, + "learning_rate": 2.7927561886117764e-07, + "loss": 0.0358, + "num_tokens": 133166803.0, + "reward": 0.0, + "reward_std": 0.7874860763549805, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0795508203062593, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09028866196854868, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09428090415820635, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2940 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1310.0, + "completions/mean_length": 1345.8125, + "completions/mean_terminated_length": 1191.625, + "completions/min_length": 953.0, + "completions/min_terminated_length": 953.0, + "epoch": 0.7354338584646162, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.344402620149762, + "kl": 0.0130157470703125, + "learning_rate": 2.789619507200365e-07, + "loss": -0.0087, + "num_tokens": 133215184.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.061706781387329, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.020582410619251347, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12586291279622158, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333334, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2941 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1373.0, + "completions/mean_length": 1228.1875, + "completions/mean_terminated_length": 1189.357177734375, + "completions/min_length": 741.0, + "completions/min_terminated_length": 741.0, + "epoch": 0.735683920980245, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.891026592034928, + "kl": 0.020111083984375, + "learning_rate": 2.786484891008789e-07, + "loss": -0.0424, + "num_tokens": 133263235.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0532174110412598, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10142228411019896, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08138063948132122, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823629, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2942 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1240.0, + "completions/max_terminated_length": 1240.0, + "completions/mean_length": 958.3125, + "completions/mean_terminated_length": 958.3125, + "completions/min_length": 683.0, + "completions/min_terminated_length": 683.0, + "epoch": 0.735933983495874, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0190759171141828, + "kl": 0.017669677734375, + "learning_rate": 2.7833523424255213e-07, + "loss": 0.0412, + "num_tokens": 133303304.0, + "reward": 0.0, + "reward_std": 0.695584774017334, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1276711243800358, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11372487118760626, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15147423690002354, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2943 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1274.0, + "completions/max_terminated_length": 1274.0, + "completions/mean_length": 1018.625, + "completions/mean_terminated_length": 1018.625, + "completions/min_length": 663.0, + "completions/min_terminated_length": 663.0, + "epoch": 0.7361840460115029, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.091660769595385, + "kl": 0.020782470703125, + "learning_rate": 2.78022186383746e-07, + "loss": -0.0553, + "num_tokens": 133341026.0, + "reward": 0.0, + "reward_std": 0.8045316934585571, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1198780514262837, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08282895942692477, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1101345977866612, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2944 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1387.0, + "completions/mean_length": 1140.625, + "completions/mean_terminated_length": 1057.6923828125, + "completions/min_length": 747.0, + "completions/min_terminated_length": 747.0, + "epoch": 0.7364341085271318, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3725253436467657, + "kl": 0.0201416015625, + "learning_rate": 2.777093457629925e-07, + "loss": -0.0028, + "num_tokens": 133383908.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0099483728408813, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05229107696944159, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06625494734218633, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466157, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2945 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1416.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1162.875, + "completions/mean_terminated_length": 1162.875, + "completions/min_length": 863.0, + "completions/min_terminated_length": 863.0, + "epoch": 0.7366841710427607, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2758182745984725, + "kl": 0.020843505859375, + "learning_rate": 2.7739671261866584e-07, + "loss": -0.011, + "num_tokens": 133435970.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9649913907051086, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02169901370085858, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1081041932060122, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0769800358919501, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2946 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1280.0625, + "completions/mean_terminated_length": 1229.3077392578125, + "completions/min_length": 1029.0, + "completions/min_terminated_length": 1029.0, + "epoch": 0.7369342335583896, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.59048517946447, + "kl": 0.0205078125, + "learning_rate": 2.770842871889818e-07, + "loss": 0.0371, + "num_tokens": 133482939.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9774616956710815, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06477350999299933, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.140372126989861, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2947 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1385.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 1107.5, + "completions/mean_terminated_length": 1107.5, + "completions/min_length": 763.0, + "completions/min_terminated_length": 763.0, + "epoch": 0.7371842960740185, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.521965725586249, + "kl": 0.0139617919921875, + "learning_rate": 2.7677206971199874e-07, + "loss": 0.0615, + "num_tokens": 133529347.0, + "reward": 0.0, + "reward_std": 0.8475252389907837, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.335897964210439, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.29470606885736284, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04127594582445936, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2948 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1419.0, + "completions/mean_length": 1313.0, + "completions/mean_terminated_length": 1072.571533203125, + "completions/min_length": 842.0, + "completions/min_terminated_length": 842.0, + "epoch": 0.7374343585896475, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.666694570734328, + "kl": 0.014404296875, + "learning_rate": 2.7646006042561577e-07, + "loss": -0.0387, + "num_tokens": 133575667.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0332765579223633, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01131092081672836, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0695994814639629, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10809803506625451, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2949 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1427.0, + "completions/mean_length": 1282.9375, + "completions/mean_terminated_length": 1184.272705078125, + "completions/min_length": 944.0, + "completions/min_terminated_length": 944.0, + "epoch": 0.7376844211052763, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.016563364940818, + "kl": 0.0150299072265625, + "learning_rate": 2.7614825956757336e-07, + "loss": 0.0306, + "num_tokens": 133630370.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8123472929000854, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04330706392077192, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10748334183996644, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797313, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2950 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1454.6875, + "completions/mean_terminated_length": 1355.0, + "completions/min_length": 1273.0, + "completions/min_terminated_length": 1273.0, + "epoch": 0.7379344836209052, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5716587406421003, + "kl": 0.0160675048828125, + "learning_rate": 2.7583666737545344e-07, + "loss": 0.0036, + "num_tokens": 133687629.0, + "reward": 0.0, + "reward_std": 0.8285629153251648, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.33708814909170026, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.30906525591561357, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316813, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2951 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1400.0, + "completions/max_terminated_length": 1400.0, + "completions/mean_length": 1115.875, + "completions/mean_terminated_length": 1115.875, + "completions/min_length": 560.0, + "completions/min_terminated_length": 560.0, + "epoch": 0.7381845461365342, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9877604000679945, + "kl": 0.047943115234375, + "learning_rate": 2.755252840866792e-07, + "loss": 0.0185, + "num_tokens": 133742315.0, + "reward": -2.2351741790771484e-08, + "reward_std": 0.9040992259979248, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.010122605358361186, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09683894795131764, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1408308678285174, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2952 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1441.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1121.25, + "completions/mean_terminated_length": 1121.25, + "completions/min_length": 573.0, + "completions/min_terminated_length": 573.0, + "epoch": 0.738434608652163, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6703687072712587, + "kl": 0.0167083740234375, + "learning_rate": 2.7521410993851406e-07, + "loss": 0.0076, + "num_tokens": 133794071.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7388441562652588, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08976858557976229, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08271695727732009, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.077817450199525, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2953 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1451.0, + "completions/mean_length": 1203.3125, + "completions/mean_terminated_length": 1183.533447265625, + "completions/min_length": 948.0, + "completions/min_terminated_length": 948.0, + "epoch": 0.7386846711677919, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.658680784151936, + "kl": 0.015380859375, + "learning_rate": 2.749031451680626e-07, + "loss": 0.0506, + "num_tokens": 133836756.0, + "reward": 0.0, + "reward_std": 0.8522695302963257, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06431433513665145, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10256164782310487, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333334, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2954 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1300.0, + "completions/max_terminated_length": 1300.0, + "completions/mean_length": 1099.3125, + "completions/mean_terminated_length": 1099.3125, + "completions/min_length": 969.0, + "completions/min_terminated_length": 969.0, + "epoch": 0.7389347336834209, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2408402938232146, + "kl": 0.017547607421875, + "learning_rate": 2.745923900122694e-07, + "loss": 0.0033, + "num_tokens": 133877465.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0663031339645386, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0191480644741296, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05436366481037519, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258098, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2955 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1418.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 968.6875, + "completions/mean_terminated_length": 968.6875, + "completions/min_length": 622.0, + "completions/min_terminated_length": 622.0, + "epoch": 0.7391847961990498, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5125393989741087, + "kl": 0.019073486328125, + "learning_rate": 2.7428184470791976e-07, + "loss": 0.008, + "num_tokens": 133910308.0, + "reward": 0.0, + "reward_std": 0.8416318297386169, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.010883063669715398, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.23392231535314656, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941139, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2956 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1486.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1011.6875, + "completions/mean_terminated_length": 1011.6875, + "completions/min_length": 712.0, + "completions/min_terminated_length": 712.0, + "epoch": 0.7394348587146786, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.616567462004698, + "kl": 0.019134521484375, + "learning_rate": 2.739715094916388e-07, + "loss": -0.0193, + "num_tokens": 133936751.0, + "reward": 0.0, + "reward_std": 0.933053195476532, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05398164108279602, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0628807112389912, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04791968589521741, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2957 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1432.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1092.5, + "completions/mean_terminated_length": 1092.5, + "completions/min_length": 637.0, + "completions/min_terminated_length": 637.0, + "epoch": 0.7396849212303076, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.832136411782129, + "kl": 0.022491455078125, + "learning_rate": 2.736613845998915e-07, + "loss": -0.0268, + "num_tokens": 133990111.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7446444034576416, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.040355595340688014, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06312833123246266, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066471, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2958 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1241.0, + "completions/mean_length": 1010.8125, + "completions/mean_terminated_length": 978.2000732421875, + "completions/min_length": 770.0, + "completions/min_terminated_length": 770.0, + "epoch": 0.7399349837459365, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.495434997859454, + "kl": 0.01361083984375, + "learning_rate": 2.733514702689831e-07, + "loss": -0.0354, + "num_tokens": 134023572.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0221027135849, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005786485056014199, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06555757871863829, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466159, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2959 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1408.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 945.25, + "completions/mean_terminated_length": 945.25, + "completions/min_length": 486.0, + "completions/min_terminated_length": 486.0, + "epoch": 0.7401850462615653, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8795730876591363, + "kl": 0.0143280029296875, + "learning_rate": 2.7304176673505764e-07, + "loss": 0.0107, + "num_tokens": 134057216.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9294309616088867, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06919686173465903, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02741563870495129, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12524050936172845, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2960 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1233.0, + "completions/max_terminated_length": 1233.0, + "completions/mean_length": 993.0, + "completions/mean_terminated_length": 993.0, + "completions/min_length": 727.0, + "completions/min_terminated_length": 727.0, + "epoch": 0.7404351087771943, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4536640515493033, + "kl": 0.0200042724609375, + "learning_rate": 2.727322742340992e-07, + "loss": 0.0709, + "num_tokens": 134104696.0, + "reward": 0.0, + "reward_std": 0.750898003578186, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011789120588892296, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0705015193078583, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11925695879998881, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2961 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1342.6875, + "completions/mean_terminated_length": 1248.300048828125, + "completions/min_length": 936.0, + "completions/min_terminated_length": 936.0, + "epoch": 0.7406851712928232, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7795223681825583, + "kl": 0.0131072998046875, + "learning_rate": 2.7242299300193045e-07, + "loss": -0.0426, + "num_tokens": 134149667.0, + "reward": -1.862645149230957e-08, + "reward_std": 0.9731484651565552, + "rewards/wordcountpos_reward_GEOBench/mean": -1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.053722758317135993, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16260079555720677, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06978803887752093, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2962 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1328.0, + "completions/mean_length": 1261.8125, + "completions/mean_terminated_length": 1206.84619140625, + "completions/min_length": 1108.0, + "completions/min_terminated_length": 1108.0, + "epoch": 0.7409352338084522, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.186304267660854, + "kl": 0.0137939453125, + "learning_rate": 2.721139232742137e-07, + "loss": -0.0108, + "num_tokens": 134185920.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8960762619972229, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05817475014632105, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.052185297752468114, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09651328828101766, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2963 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1219.8125, + "completions/mean_terminated_length": 1155.1539306640625, + "completions/min_length": 940.0, + "completions/min_terminated_length": 940.0, + "epoch": 0.741185296324081, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0736120654126906, + "kl": 0.017608642578125, + "learning_rate": 2.7180506528644964e-07, + "loss": -0.1068, + "num_tokens": 134235573.0, + "reward": 0.0, + "reward_std": 0.46755126118659973, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1707951401122709, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.22714726857936296, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14981470036162822, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2964 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1204.375, + "completions/mean_terminated_length": 1184.666748046875, + "completions/min_length": 985.0, + "completions/min_terminated_length": 985.0, + "epoch": 0.7414353588397099, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.915335199947237, + "kl": 0.0124053955078125, + "learning_rate": 2.714964192739777e-07, + "loss": -0.072, + "num_tokens": 134279939.0, + "reward": 0.0, + "reward_std": 0.8012506365776062, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.150594845192555, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18073302788780876, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1167460047694551, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2965 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1333.0, + "completions/mean_length": 1266.1875, + "completions/mean_terminated_length": 1159.9091796875, + "completions/min_length": 1031.0, + "completions/min_terminated_length": 1031.0, + "epoch": 0.7416854213553389, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.056589010966818, + "kl": 0.01922607421875, + "learning_rate": 2.711879854719763e-07, + "loss": 0.015, + "num_tokens": 134322622.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9224755764007568, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0835592047862225, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09034343663117177, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503963, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2966 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1076.0, + "completions/mean_terminated_length": 1047.7333984375, + "completions/min_length": 592.0, + "completions/min_terminated_length": 592.0, + "epoch": 0.7419354838709677, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.279921817054807, + "kl": 0.0164794921875, + "learning_rate": 2.7087976411546154e-07, + "loss": 0.0365, + "num_tokens": 134360774.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0664619207382202, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08427420228985288, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06021130655048928, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11800816042090448, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2967 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1247.375, + "completions/mean_terminated_length": 1211.2857666015625, + "completions/min_length": 829.0, + "completions/min_terminated_length": 829.0, + "epoch": 0.7421855463865966, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8041564669005288, + "kl": 0.0145111083984375, + "learning_rate": 2.7057175543928747e-07, + "loss": 0.0075, + "num_tokens": 134403380.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9319431185722351, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.007648559986043161, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09704849919338498, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15098442401882486, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2968 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1416.0, + "completions/mean_terminated_length": 1350.6666259765625, + "completions/min_length": 1153.0, + "completions/min_terminated_length": 1153.0, + "epoch": 0.7424356089022256, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9430367329474025, + "kl": 0.0174102783203125, + "learning_rate": 2.702639596781469e-07, + "loss": 0.0043, + "num_tokens": 134445484.0, + "reward": 0.0, + "reward_std": 0.7475389242172241, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.039533163642133044, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.061440036220211214, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620105, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2969 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1494.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1130.3125, + "completions/mean_terminated_length": 1130.3125, + "completions/min_length": 759.0, + "completions/min_terminated_length": 759.0, + "epoch": 0.7426856714178545, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.336837114057825, + "kl": 0.016937255859375, + "learning_rate": 2.699563770665699e-07, + "loss": -0.0068, + "num_tokens": 134492161.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.905495285987854, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.015779017958611107, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08523698852620017, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2970 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1266.0, + "completions/max_terminated_length": 1266.0, + "completions/mean_length": 994.625, + "completions/mean_terminated_length": 994.625, + "completions/min_length": 757.0, + "completions/min_terminated_length": 757.0, + "epoch": 0.7429357339334833, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.0647737597544795, + "kl": 0.029296875, + "learning_rate": 2.6964900783892407e-07, + "loss": -0.032, + "num_tokens": 134534011.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.059629201889038, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.004463795249123024, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04157733282524031, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11917929226045819, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2971 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1404.75, + "completions/mean_terminated_length": 1309.5, + "completions/min_length": 1118.0, + "completions/min_terminated_length": 1118.0, + "epoch": 0.7431857964491123, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.071713097256719, + "kl": 0.017425537109375, + "learning_rate": 2.6934185222941474e-07, + "loss": -0.0055, + "num_tokens": 134580263.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9629567265510559, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12697185339005676, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0920287687981992, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13601470508735444, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2972 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1479.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1208.1875, + "completions/mean_terminated_length": 1208.1875, + "completions/min_length": 1051.0, + "completions/min_terminated_length": 1051.0, + "epoch": 0.7434358589647412, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1781735082361258, + "kl": 0.0131072998046875, + "learning_rate": 2.6903491047208406e-07, + "loss": -0.014, + "num_tokens": 134626762.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7199552059173584, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.17377396321177824, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.23209021128495397, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11013459778666117, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 2973 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1427.0, + "completions/max_terminated_length": 1427.0, + "completions/mean_length": 1129.5625, + "completions/mean_terminated_length": 1129.5625, + "completions/min_length": 648.0, + "completions/min_terminated_length": 648.0, + "epoch": 0.74368592148037, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.138610363257044, + "kl": 0.019775390625, + "learning_rate": 2.687281828008115e-07, + "loss": 0.0126, + "num_tokens": 134668147.0, + "reward": 0.0, + "reward_std": 1.0404014587402344, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04427869439030627, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09853996438815668, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10886621079036349, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2974 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1159.0, + "completions/max_terminated_length": 1159.0, + "completions/mean_length": 897.625, + "completions/mean_terminated_length": 897.625, + "completions/min_length": 648.0, + "completions/min_terminated_length": 648.0, + "epoch": 0.743935983995999, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6858374662228814, + "kl": 0.020660400390625, + "learning_rate": 2.684216694493132e-07, + "loss": -0.0237, + "num_tokens": 134700877.0, + "reward": 0.0, + "reward_std": 0.8383627533912659, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.017912452502992414, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04564901578665171, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503963, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2975 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1382.0, + "completions/mean_length": 1228.5625, + "completions/mean_terminated_length": 1189.7857666015625, + "completions/min_length": 877.0, + "completions/min_terminated_length": 877.0, + "epoch": 0.7441860465116279, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8540746977000375, + "kl": 0.014312744140625, + "learning_rate": 2.6811537065114253e-07, + "loss": 0.0053, + "num_tokens": 134752118.0, + "reward": 0.0, + "reward_std": 0.6271020770072937, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.15722263446622284, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2840235779614398, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2976 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1313.0, + "completions/mean_length": 1047.4375, + "completions/mean_terminated_length": 982.7857666015625, + "completions/min_length": 621.0, + "completions/min_terminated_length": 621.0, + "epoch": 0.7444361090272568, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.324589274165111, + "kl": 0.0211181640625, + "learning_rate": 2.6780928663968883e-07, + "loss": -0.0428, + "num_tokens": 134792493.0, + "reward": 0.0, + "reward_std": 0.7578592300415039, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06229028865359832, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15727992137542648, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12995725793078622, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2977 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1336.0, + "completions/mean_length": 1070.25, + "completions/mean_terminated_length": 1041.60009765625, + "completions/min_length": 897.0, + "completions/min_terminated_length": 897.0, + "epoch": 0.7446861715428857, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.965690112803656, + "kl": 0.016265869140625, + "learning_rate": 2.6750341764817787e-07, + "loss": -0.0313, + "num_tokens": 134834369.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.030699372291565, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03762903736649671, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09669543168399042, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08933913745655644, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2978 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1368.0, + "completions/mean_length": 1283.1875, + "completions/mean_terminated_length": 1233.1539306640625, + "completions/min_length": 1015.0, + "completions/min_terminated_length": 1015.0, + "epoch": 0.7449362340585146, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7683648797467186, + "kl": 0.0201416015625, + "learning_rate": 2.6719776390967156e-07, + "loss": -0.0293, + "num_tokens": 134881348.0, + "reward": 0.0, + "reward_std": 0.6099780201911926, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.006998251680273352, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16532399328520778, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1465024333004847, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2979 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1399.0, + "completions/mean_length": 1080.6875, + "completions/mean_terminated_length": 1020.7857666015625, + "completions/min_length": 607.0, + "completions/min_terminated_length": 607.0, + "epoch": 0.7451862965741435, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5411352068132764, + "kl": 0.01184844970703125, + "learning_rate": 2.668923256570681e-07, + "loss": -0.0299, + "num_tokens": 134936679.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8279332518577576, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.2516732158603074, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15216894403622866, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.19846634195472262, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2980 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1369.0, + "completions/max_terminated_length": 1369.0, + "completions/mean_length": 1231.0625, + "completions/mean_terminated_length": 1231.0625, + "completions/min_length": 1104.0, + "completions/min_terminated_length": 1104.0, + "epoch": 0.7454363590897725, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6729268082783504, + "kl": 0.01458740234375, + "learning_rate": 2.6658710312310124e-07, + "loss": -0.0282, + "num_tokens": 134983824.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0611343383789062, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02743398916051563, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04829548557712461, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11800816042090449, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 2981 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1412.0, + "completions/mean_length": 1137.9375, + "completions/mean_terminated_length": 1113.800048828125, + "completions/min_length": 510.0, + "completions/min_terminated_length": 510.0, + "epoch": 0.7456864216054013, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2101298385971466, + "kl": 0.018524169921875, + "learning_rate": 2.6628209654034016e-07, + "loss": -0.0773, + "num_tokens": 135034303.0, + "reward": 0.0, + "reward_std": 0.798080563545227, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04027491574815313, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09917040242044466, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715323, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2982 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1395.1875, + "completions/mean_terminated_length": 1332.300048828125, + "completions/min_length": 1204.0, + "completions/min_terminated_length": 1204.0, + "epoch": 0.7459364841210303, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4154454717210623, + "kl": 0.020050048828125, + "learning_rate": 2.659773061411901e-07, + "loss": -0.0044, + "num_tokens": 135098450.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9795109033584595, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.030104448827967593, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07842619746898306, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06426219440409446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2983 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1427.375, + "completions/mean_terminated_length": 1370.888916015625, + "completions/min_length": 1286.0, + "completions/min_terminated_length": 1286.0, + "epoch": 0.7461865466366592, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.545420468354446, + "kl": 0.0151519775390625, + "learning_rate": 2.6567273215789105e-07, + "loss": 0.0061, + "num_tokens": 135152504.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9210095405578613, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09501640125114996, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07389690628076173, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06652763279965648, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2984 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1354.0, + "completions/mean_length": 1111.4375, + "completions/mean_terminated_length": 1085.533447265625, + "completions/min_length": 877.0, + "completions/min_terminated_length": 877.0, + "epoch": 0.746436609152288, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.826826295925997, + "kl": 0.016357421875, + "learning_rate": 2.6536837482251836e-07, + "loss": -0.0158, + "num_tokens": 135193095.0, + "reward": 0.0, + "reward_std": 0.9485629796981812, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.052059690178245945, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11798602534552115, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.052880017930181315, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2985 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1167.625, + "completions/mean_terminated_length": 1120.1429443359375, + "completions/min_length": 684.0, + "completions/min_terminated_length": 684.0, + "epoch": 0.746686671667917, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.156881686863087, + "kl": 0.01568603515625, + "learning_rate": 2.6506423436698176e-07, + "loss": -0.0191, + "num_tokens": 135238193.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0577075481414795, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07531583692058107, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12378536953299515, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2986 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1418.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 1060.5, + "completions/mean_terminated_length": 1060.5, + "completions/min_length": 841.0, + "completions/min_terminated_length": 841.0, + "epoch": 0.7469367341835459, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3009724732475387, + "kl": 0.017913818359375, + "learning_rate": 2.6476031102302676e-07, + "loss": 0.003, + "num_tokens": 135285561.0, + "reward": 0.0, + "reward_std": 0.6821597814559937, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0360001942117978, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06339483252680939, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1060223596263578, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2987 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1243.9375, + "completions/mean_terminated_length": 1158.5833740234375, + "completions/min_length": 740.0, + "completions/min_terminated_length": 740.0, + "epoch": 0.7471867966991748, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7970597439365963, + "kl": 0.0150604248046875, + "learning_rate": 2.6445660502223254e-07, + "loss": -0.0111, + "num_tokens": 135333176.0, + "reward": 0.0, + "reward_std": 1.033457636833191, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08157008125140035, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10516623844045977, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353545, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2988 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.8125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1474.5625, + "completions/mean_terminated_length": 1364.3333740234375, + "completions/min_length": 1198.0, + "completions/min_terminated_length": 1198.0, + "epoch": 0.7474368592148037, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4458076541341627, + "kl": 0.01104736328125, + "learning_rate": 2.641531165960129e-07, + "loss": -0.0139, + "num_tokens": 135402409.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.058016300201416, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0074410150091411085, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.31761586247608764, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08432740427115679, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2989 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1262.0, + "completions/max_terminated_length": 1262.0, + "completions/mean_length": 1058.0625, + "completions/mean_terminated_length": 1058.0625, + "completions/min_length": 854.0, + "completions/min_terminated_length": 854.0, + "epoch": 0.7476869217304326, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1058705492283316, + "kl": 0.01551055908203125, + "learning_rate": 2.638498459756164e-07, + "loss": -0.0314, + "num_tokens": 135442730.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9046301245689392, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09246278977848549, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20548555409353456, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059472994182545084, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2990 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1330.0, + "completions/mean_length": 1261.125, + "completions/mean_terminated_length": 1117.800048828125, + "completions/min_length": 954.0, + "completions/min_terminated_length": 954.0, + "epoch": 0.7479369842460615, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8994752120163545, + "kl": 0.0152435302734375, + "learning_rate": 2.635467933921246e-07, + "loss": 0.0407, + "num_tokens": 135494052.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.3616088628768921, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.011857723356434289, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1902036322687001, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1849424334859464, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2991 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 986.0, + "completions/max_terminated_length": 986.0, + "completions/mean_length": 753.8125, + "completions/mean_terminated_length": 753.8125, + "completions/min_length": 498.0, + "completions/min_terminated_length": 498.0, + "epoch": 0.7481870467616905, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.321681369926891, + "kl": 0.020721435546875, + "learning_rate": 2.6324395907645376e-07, + "loss": 0.0287, + "num_tokens": 135521585.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7705485820770264, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.016896778374771413, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05747972869990197, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16777409856157224, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2992 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1204.8125, + "completions/mean_terminated_length": 1185.1334228515625, + "completions/min_length": 779.0, + "completions/min_terminated_length": 779.0, + "epoch": 0.7484371092773193, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.116446213259558, + "kl": 0.0206298828125, + "learning_rate": 2.629413432593533e-07, + "loss": 0.0277, + "num_tokens": 135572758.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9279994964599609, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00509458200665373, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09892308161783563, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2993 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1295.0, + "completions/max_terminated_length": 1295.0, + "completions/mean_length": 1097.9375, + "completions/mean_terminated_length": 1097.9375, + "completions/min_length": 912.0, + "completions/min_terminated_length": 912.0, + "epoch": 0.7486871717929482, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.374228814426986, + "kl": 0.017425537109375, + "learning_rate": 2.6263894617140693e-07, + "loss": -0.0178, + "num_tokens": 135611869.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0423120260238647, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.21598989049788067, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.31712128356941854, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2994 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1200.625, + "completions/mean_terminated_length": 967.7777709960938, + "completions/min_length": 777.0, + "completions/min_terminated_length": 777.0, + "epoch": 0.7489372343085772, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1229668102258565, + "kl": 0.01715087890625, + "learning_rate": 2.6233676804303074e-07, + "loss": 0.0416, + "num_tokens": 135672511.0, + "reward": 0.0, + "reward_std": 0.6196449995040894, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0680904577669541, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10086716280170575, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15341785110291775, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2995 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1391.1875, + "completions/mean_terminated_length": 1306.5555419921875, + "completions/min_length": 1085.0, + "completions/min_terminated_length": 1085.0, + "epoch": 0.749187296824206, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4052341131907617, + "kl": 0.013885498046875, + "learning_rate": 2.6203480910447463e-07, + "loss": 0.0076, + "num_tokens": 135734226.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6746236085891724, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09702029447091029, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0712413053239779, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503961, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2996 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1428.0, + "completions/max_terminated_length": 1428.0, + "completions/mean_length": 998.5625, + "completions/mean_terminated_length": 998.5625, + "completions/min_length": 580.0, + "completions/min_terminated_length": 580.0, + "epoch": 0.7494373593398349, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.877493656182466, + "kl": 0.020965576171875, + "learning_rate": 2.617330695858212e-07, + "loss": -0.0495, + "num_tokens": 135776763.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.023902177810669, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.103158903554913, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08421603206166171, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11474609652039004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 2997 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1248.6875, + "completions/mean_terminated_length": 1231.933349609375, + "completions/min_length": 841.0, + "completions/min_terminated_length": 841.0, + "epoch": 0.7496874218554639, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6572583425957106, + "kl": 0.0113525390625, + "learning_rate": 2.6143154971698595e-07, + "loss": -0.0381, + "num_tokens": 135824246.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6093450784683228, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.008879598420732167, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06053615920113506, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12988598989256067, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2998 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1236.8125, + "completions/mean_terminated_length": 1199.21435546875, + "completions/min_length": 1041.0, + "completions/min_terminated_length": 1041.0, + "epoch": 0.7499374843710928, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7865777650368835, + "kl": 0.0160064697265625, + "learning_rate": 2.6113024972771704e-07, + "loss": 0.0058, + "num_tokens": 135871915.0, + "reward": 0.0, + "reward_std": 0.9637945890426636, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01487284523558191, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12007179139577791, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09428090415820636, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 2999 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 830.0, + "completions/mean_length": 1155.4375, + "completions/mean_terminated_length": 810.875, + "completions/min_length": 804.0, + "completions/min_terminated_length": 804.0, + "epoch": 0.7501875468867217, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.429932754424475, + "kl": 0.0131378173828125, + "learning_rate": 2.6082916984759473e-07, + "loss": -0.0375, + "num_tokens": 135916858.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.9752194285392761, + "rewards/wordcountpos_reward_GEOBench/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.22641615900208603, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.269960721441875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0768596604689834, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3000 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1318.625, + "completions/mean_terminated_length": 1276.769287109375, + "completions/min_length": 1065.0, + "completions/min_terminated_length": 1065.0, + "epoch": 0.7504376094023506, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9909950397684546, + "kl": 0.0156707763671875, + "learning_rate": 2.605283103060324e-07, + "loss": 0.0241, + "num_tokens": 135967836.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9183400869369507, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.007518190065756357, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06465888759949254, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06540472290116198, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3001 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1230.0, + "completions/max_terminated_length": 1230.0, + "completions/mean_length": 1113.3125, + "completions/mean_terminated_length": 1113.3125, + "completions/min_length": 991.0, + "completions/min_terminated_length": 991.0, + "epoch": 0.7506876719179795, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6139799657497345, + "kl": 0.0177001953125, + "learning_rate": 2.6022767133227484e-07, + "loss": -0.0062, + "num_tokens": 136001137.0, + "reward": 0.0, + "reward_std": 0.9913209676742554, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0061997744057836306, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0868654386651096, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08944271909999159, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3002 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1186.1875, + "completions/mean_terminated_length": 1165.2667236328125, + "completions/min_length": 1017.0, + "completions/min_terminated_length": 1017.0, + "epoch": 0.7509377344336085, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.688795959681133, + "kl": 0.0094757080078125, + "learning_rate": 2.5992725315539874e-07, + "loss": -0.0226, + "num_tokens": 136043980.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.9380015730857849, + "rewards/wordcountpos_reward_GEOBench/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.041301315312272525, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06681671169047927, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14168300559373406, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3003 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1260.0, + "completions/mean_length": 1293.8125, + "completions/mean_terminated_length": 1133.4444580078125, + "completions/min_length": 956.0, + "completions/min_terminated_length": 956.0, + "epoch": 0.7511877969492373, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9375185782010935, + "kl": 0.015716552734375, + "learning_rate": 2.596270560043128e-07, + "loss": -0.027, + "num_tokens": 136091753.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0483837127685547, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09586389485939438, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1056349036254885, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09108400680852978, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3004 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.75, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1400.0, + "completions/mean_length": 1401.9375, + "completions/mean_terminated_length": 1107.75, + "completions/min_length": 710.0, + "completions/min_terminated_length": 710.0, + "epoch": 0.7514378594648662, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4973353309167416, + "kl": 0.016754150390625, + "learning_rate": 2.5932708010775744e-07, + "loss": -0.0066, + "num_tokens": 136151976.0, + "reward": 0.0, + "reward_std": 0.9117569327354431, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0035069271717622333, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.035017830960205054, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13221755360572016, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3005 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1287.0, + "completions/mean_length": 1061.0, + "completions/mean_terminated_length": 998.2857666015625, + "completions/min_length": 752.0, + "completions/min_terminated_length": 752.0, + "epoch": 0.7516879219804952, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.257357660838128, + "kl": 0.012969970703125, + "learning_rate": 2.590273256943043e-07, + "loss": 0.0092, + "num_tokens": 136189584.0, + "reward": -3.3527612686157227e-08, + "reward_std": 1.0593757629394531, + "rewards/wordcountpos_reward_GEOBench/mean": -3.3527612686157227e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01869775787243833, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09580598471344963, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036262, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3006 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1488.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1038.875, + "completions/mean_terminated_length": 1038.875, + "completions/min_length": 700.0, + "completions/min_terminated_length": 700.0, + "epoch": 0.751937984496124, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8388883260914204, + "kl": 0.015899658203125, + "learning_rate": 2.5872779299235603e-07, + "loss": -0.0336, + "num_tokens": 136228294.0, + "reward": 0.0, + "reward_std": 0.911024808883667, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019156173161377296, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09359265296723568, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12852438880818895, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3007 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1425.0, + "completions/mean_length": 1351.4375, + "completions/mean_terminated_length": 1262.300048828125, + "completions/min_length": 1138.0, + "completions/min_terminated_length": 1138.0, + "epoch": 0.7521880470117529, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6097606860057576, + "kl": 0.0184783935546875, + "learning_rate": 2.584284822301472e-07, + "loss": -0.0038, + "num_tokens": 136267621.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5926787257194519, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.038840381861848405, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11234887242707581, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16771890063326086, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3008 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1307.75, + "completions/mean_terminated_length": 1243.666748046875, + "completions/min_length": 1140.0, + "completions/min_terminated_length": 1140.0, + "epoch": 0.7524381095273819, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4926646167680833, + "kl": 0.012969970703125, + "learning_rate": 2.58129393635742e-07, + "loss": -0.0098, + "num_tokens": 136317321.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9873582720756531, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.002379586818629254, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.35798245535462603, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639736, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3009 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1404.0, + "completions/mean_terminated_length": 1244.0, + "completions/min_length": 1053.0, + "completions/min_terminated_length": 1053.0, + "epoch": 0.7526881720430108, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.986410991352138, + "kl": 0.01922607421875, + "learning_rate": 2.5783052743703626e-07, + "loss": 0.0033, + "num_tokens": 136373897.0, + "reward": 0.0, + "reward_std": 0.7147550582885742, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.2432753488920521, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1201760218915713, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.19474579822405907, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3010 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1172.0, + "completions/max_terminated_length": 1172.0, + "completions/mean_length": 990.875, + "completions/mean_terminated_length": 990.875, + "completions/min_length": 624.0, + "completions/min_terminated_length": 624.0, + "epoch": 0.7529382345586396, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.443752191908693, + "kl": 0.015899658203125, + "learning_rate": 2.5753188386175597e-07, + "loss": -0.0291, + "num_tokens": 136423391.0, + "reward": 0.0, + "reward_std": 0.7084863781929016, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10199228142516797, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11847023345025534, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.133263870794973, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3011 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1377.0, + "completions/mean_length": 1127.1875, + "completions/mean_terminated_length": 1041.1539306640625, + "completions/min_length": 646.0, + "completions/min_terminated_length": 646.0, + "epoch": 0.7531882970742686, + "frac_reward_zero_std": 0.0, + "grad_norm": 60.46907214743566, + "kl": 0.1298828125, + "learning_rate": 2.572334631374579e-07, + "loss": -0.0128, + "num_tokens": 136480114.0, + "reward": 0.0, + "reward_std": 0.9870802760124207, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04634466105936363, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.030722776310059843, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09496588081262936, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3012 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1187.0, + "completions/mean_length": 1223.5625, + "completions/mean_terminated_length": 947.125, + "completions/min_length": 649.0, + "completions/min_terminated_length": 649.0, + "epoch": 0.7534383595898975, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.106120406109762, + "kl": 0.0185546875, + "learning_rate": 2.569352654915284e-07, + "loss": 0.0157, + "num_tokens": 136536875.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0252180099487305, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.013542645869468743, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1269109452991844, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10532137766186214, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3013 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1390.0, + "completions/max_terminated_length": 1390.0, + "completions/mean_length": 1176.125, + "completions/mean_terminated_length": 1176.125, + "completions/min_length": 953.0, + "completions/min_terminated_length": 953.0, + "epoch": 0.7536884221055263, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.988288551560161, + "kl": 0.020111083984375, + "learning_rate": 2.566372911511844e-07, + "loss": -0.0026, + "num_tokens": 136594573.0, + "reward": 0.0, + "reward_std": 0.42261016368865967, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.3925353416486472, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.623613739835308, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3014 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1436.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1100.375, + "completions/mean_terminated_length": 1100.375, + "completions/min_length": 796.0, + "completions/min_terminated_length": 796.0, + "epoch": 0.7539384846211553, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.540022143603176, + "kl": 0.020355224609375, + "learning_rate": 2.563395403434723e-07, + "loss": -0.0133, + "num_tokens": 136638411.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7821365594863892, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0502866698285705, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05303325516417991, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17126976771553507, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3015 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1159.0, + "completions/max_terminated_length": 1159.0, + "completions/mean_length": 920.125, + "completions/mean_terminated_length": 920.125, + "completions/min_length": 675.0, + "completions/min_terminated_length": 675.0, + "epoch": 0.7541885471367842, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.75759795255801, + "kl": 0.0142059326171875, + "learning_rate": 2.560420132952684e-07, + "loss": -0.0027, + "num_tokens": 136676397.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.0016947984695435, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.060880284052535115, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10497314488313363, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07187952884282608, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3016 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1199.0, + "completions/max_terminated_length": 1199.0, + "completions/mean_length": 1060.1875, + "completions/mean_terminated_length": 1060.1875, + "completions/min_length": 577.0, + "completions/min_terminated_length": 577.0, + "epoch": 0.754438609652413, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6088608536940847, + "kl": 0.013427734375, + "learning_rate": 2.557447102332783e-07, + "loss": -0.04, + "num_tokens": 136710816.0, + "reward": 0.0, + "reward_std": 0.8187819719314575, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.24141174903476084, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.23193960932050767, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1180081604209045, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3017 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1409.8125, + "completions/mean_terminated_length": 1293.857177734375, + "completions/min_length": 1161.0, + "completions/min_terminated_length": 1161.0, + "epoch": 0.754688672168042, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3524134633293916, + "kl": 0.0120849609375, + "learning_rate": 2.55447631384037e-07, + "loss": 0.0302, + "num_tokens": 136771349.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5320738554000854, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00500540496106568, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09296991219580306, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1387777332977422, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3018 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1487.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1008.4375, + "completions/mean_terminated_length": 1008.4375, + "completions/min_length": 689.0, + "completions/min_terminated_length": 689.0, + "epoch": 0.7549387346836709, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.830319260870358, + "kl": 0.024200439453125, + "learning_rate": 2.551507769739091e-07, + "loss": -0.1035, + "num_tokens": 136817076.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0344938039779663, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.18258447011734033, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12672957955917485, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12881223774390613, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3019 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1298.3125, + "completions/mean_terminated_length": 1284.86669921875, + "completions/min_length": 980.0, + "completions/min_terminated_length": 980.0, + "epoch": 0.7551887971992999, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4327329125255486, + "kl": 0.020050048828125, + "learning_rate": 2.548541472290876e-07, + "loss": -0.0002, + "num_tokens": 136861905.0, + "reward": 0.0, + "reward_std": 0.8542677760124207, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.15438376625137742, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1376907077744484, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078611, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3020 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1298.4375, + "completions/mean_terminated_length": 1269.6429443359375, + "completions/min_length": 1026.0, + "completions/min_terminated_length": 1026.0, + "epoch": 0.7554388597149287, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2388696910171184, + "kl": 0.0171661376953125, + "learning_rate": 2.5455774237559404e-07, + "loss": -0.0177, + "num_tokens": 136922528.0, + "reward": 0.0, + "reward_std": 1.0202596187591553, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.007283676516821032, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0631832656132272, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10809803506625451, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3021 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1379.0, + "completions/mean_length": 1406.5625, + "completions/mean_terminated_length": 1250.8333740234375, + "completions/min_length": 1071.0, + "completions/min_terminated_length": 1071.0, + "epoch": 0.7556889222305576, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5632758973630367, + "kl": 0.015625, + "learning_rate": 2.5426156263927975e-07, + "loss": -0.0301, + "num_tokens": 136974905.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9397590756416321, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12482260679690621, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13222398234625987, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16049691355057039, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3022 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1236.1875, + "completions/mean_terminated_length": 1218.60009765625, + "completions/min_length": 980.0, + "completions/min_terminated_length": 980.0, + "epoch": 0.7559389847461866, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9013797034400561, + "kl": 0.00843048095703125, + "learning_rate": 2.539656082458234e-07, + "loss": -0.0329, + "num_tokens": 137021028.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7809981107711792, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.020571517245673, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10704841162484655, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07503085784948504, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3023 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1404.0, + "completions/mean_length": 1144.5, + "completions/mean_terminated_length": 1093.71435546875, + "completions/min_length": 601.0, + "completions/min_terminated_length": 601.0, + "epoch": 0.7561890472618155, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8566824579779806, + "kl": 0.016937255859375, + "learning_rate": 2.5366987942073265e-07, + "loss": -0.0685, + "num_tokens": 137062148.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7565597295761108, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.013307003753584793, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.044146706505381855, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11279282877125756, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3024 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1362.0, + "completions/max_terminated_length": 1362.0, + "completions/mean_length": 1002.0, + "completions/mean_terminated_length": 1002.0, + "completions/min_length": 769.0, + "completions/min_terminated_length": 769.0, + "epoch": 0.7564391097774443, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6403446525673067, + "kl": 0.0146942138671875, + "learning_rate": 2.5337437638934287e-07, + "loss": -0.0738, + "num_tokens": 137090908.0, + "reward": 0.0, + "reward_std": 0.7184847593307495, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06428413405352662, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07480144330511493, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087679, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3025 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1408.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 1090.3125, + "completions/mean_terminated_length": 1090.3125, + "completions/min_length": 802.0, + "completions/min_terminated_length": 802.0, + "epoch": 0.7566891722930733, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8956353696736064, + "kl": 0.023223876953125, + "learning_rate": 2.530790993768177e-07, + "loss": -0.0743, + "num_tokens": 137145129.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0105229616165161, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0317180270446473, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04792565135840934, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11729986896522632, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3026 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1279.8125, + "completions/mean_terminated_length": 1265.1334228515625, + "completions/min_length": 1121.0, + "completions/min_terminated_length": 1121.0, + "epoch": 0.7569392348087022, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6900491292197506, + "kl": 0.0156707763671875, + "learning_rate": 2.527840486081483e-07, + "loss": -0.0166, + "num_tokens": 137184982.0, + "reward": 0.0, + "reward_std": 0.9678325653076172, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.043071315070274376, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10433107528156436, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3027 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1300.625, + "completions/mean_terminated_length": 1181.0, + "completions/min_length": 795.0, + "completions/min_terminated_length": 795.0, + "epoch": 0.757189297324331, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.715973121913737, + "kl": 0.0216522216796875, + "learning_rate": 2.524892243081534e-07, + "loss": -0.0096, + "num_tokens": 137232784.0, + "reward": 0.0, + "reward_std": 0.8293786644935608, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.016800917309888874, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11623764062599039, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09428090415820635, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3028 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 839.0, + "completions/mean_terminated_length": 686.4615478515625, + "completions/min_length": 487.0, + "completions/min_terminated_length": 487.0, + "epoch": 0.75743935983996, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.142708877363077, + "kl": 0.018310546875, + "learning_rate": 2.5219462670147964e-07, + "loss": -0.0459, + "num_tokens": 137283504.0, + "reward": 0.0, + "reward_std": 0.7296074628829956, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0031592852998739867, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.01726872668785355, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06309898162000302, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3029 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1203.8125, + "completions/mean_terminated_length": 1161.5, + "completions/min_length": 852.0, + "completions/min_terminated_length": 852.0, + "epoch": 0.7576894223555889, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.061522778746353, + "kl": 0.02081298828125, + "learning_rate": 2.519002560126004e-07, + "loss": -0.0614, + "num_tokens": 137328085.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9936102032661438, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0218686590345813, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.054053809448885055, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1029203215725281, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3030 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.75, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1428.75, + "completions/mean_terminated_length": 1215.0, + "completions/min_length": 649.0, + "completions/min_terminated_length": 649.0, + "epoch": 0.7579394848712178, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.650204634822079, + "kl": 0.0135650634765625, + "learning_rate": 2.5160611246581644e-07, + "loss": -0.0085, + "num_tokens": 137388521.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0327956676483154, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03452662061777245, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0776069791466704, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12041594578792296, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3031 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1326.0, + "completions/mean_length": 1039.875, + "completions/mean_terminated_length": 933.6923217773438, + "completions/min_length": 587.0, + "completions/min_terminated_length": 587.0, + "epoch": 0.7581895473868467, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.766097583813176, + "kl": 0.0185546875, + "learning_rate": 2.5131219628525525e-07, + "loss": -0.017, + "num_tokens": 137427271.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.04239821434021, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00018192932170317598, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.016654721344089983, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.2563273370198811, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 3032 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1390.0, + "completions/mean_length": 1133.375, + "completions/mean_terminated_length": 1108.933349609375, + "completions/min_length": 862.0, + "completions/min_terminated_length": 862.0, + "epoch": 0.7584396099024756, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.178419865032869, + "kl": 0.019287109375, + "learning_rate": 2.510185076948714e-07, + "loss": -0.0327, + "num_tokens": 137488637.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5491786003112793, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1018250186602428, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11486757927032734, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11538983843829065, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3033 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1005.0, + "completions/mean_length": 1185.5, + "completions/mean_terminated_length": 871.0, + "completions/min_length": 804.0, + "completions/min_terminated_length": 804.0, + "epoch": 0.7586896724181045, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3643393013108716, + "kl": 0.009063720703125, + "learning_rate": 2.507250469184456e-07, + "loss": -0.0077, + "num_tokens": 137541469.0, + "reward": 0.0, + "reward_std": 0.817253589630127, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6124999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06540472290116194, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3034 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1444.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 991.0, + "completions/mean_terminated_length": 991.0, + "completions/min_length": 581.0, + "completions/min_terminated_length": 581.0, + "epoch": 0.7589397349337335, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.652460359008981, + "kl": 0.01824951171875, + "learning_rate": 2.504318141795852e-07, + "loss": -0.0156, + "num_tokens": 137583389.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0657049417495728, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11457208166501973, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0670627592022617, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3035 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1361.0, + "completions/max_terminated_length": 1361.0, + "completions/mean_length": 993.5, + "completions/mean_terminated_length": 993.5, + "completions/min_length": 590.0, + "completions/min_terminated_length": 590.0, + "epoch": 0.7591897974493623, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3376385754767552, + "kl": 0.01629638671875, + "learning_rate": 2.501388097017241e-07, + "loss": 0.0162, + "num_tokens": 137624757.0, + "reward": -4.470348358154297e-08, + "reward_std": 1.0127235651016235, + "rewards/wordcountpos_reward_GEOBench/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.027643684637992796, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12378821664512031, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13601470508735444, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3036 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1135.0, + "completions/mean_terminated_length": 1013.3333740234375, + "completions/min_length": 551.0, + "completions/min_terminated_length": 551.0, + "epoch": 0.7594398599649912, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2435147196735685, + "kl": 0.021209716796875, + "learning_rate": 2.4984603370812165e-07, + "loss": 0.0127, + "num_tokens": 137660397.0, + "reward": 0.0, + "reward_std": 1.0136319398880005, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06700380561337634, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08151207494226452, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3037 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1381.875, + "completions/mean_terminated_length": 1328.181884765625, + "completions/min_length": 1107.0, + "completions/min_terminated_length": 1107.0, + "epoch": 0.7596899224806202, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.268341944579493, + "kl": 0.0134124755859375, + "learning_rate": 2.4955348642186375e-07, + "loss": -0.0494, + "num_tokens": 137724307.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9060251712799072, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04402394542762166, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.060884185884630654, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238707, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3038 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1345.0, + "completions/max_terminated_length": 1345.0, + "completions/mean_length": 1060.4375, + "completions/mean_terminated_length": 1060.4375, + "completions/min_length": 765.0, + "completions/min_terminated_length": 765.0, + "epoch": 0.759939984996249, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3818387840560735, + "kl": 0.0202789306640625, + "learning_rate": 2.4926116806586126e-07, + "loss": 0.0188, + "num_tokens": 137755490.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0158179998397827, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0028816731567734375, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0802557076689075, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0958393717904348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3039 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1299.0, + "completions/mean_terminated_length": 1232.0, + "completions/min_length": 991.0, + "completions/min_terminated_length": 991.0, + "epoch": 0.760190047511878, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.883928217279752, + "kl": 0.0146484375, + "learning_rate": 2.4896907886285137e-07, + "loss": -0.0304, + "num_tokens": 137807618.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0258026123046875, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.001551298928754722, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06836909249120096, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036262, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3040 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1497.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1038.0625, + "completions/mean_terminated_length": 1038.0625, + "completions/min_length": 658.0, + "completions/min_terminated_length": 658.0, + "epoch": 0.7604401100275069, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.696171425928903, + "kl": 0.02020263671875, + "learning_rate": 2.4867721903539634e-07, + "loss": 0.0198, + "num_tokens": 137847163.0, + "reward": 2.2351741790771484e-08, + "reward_std": 0.9803884029388428, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.039624023413100035, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03557034299861272, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11122216672215289, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3041 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1470.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1052.6875, + "completions/mean_terminated_length": 1052.6875, + "completions/min_length": 694.0, + "completions/min_terminated_length": 694.0, + "epoch": 0.7606901725431358, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.411959185301536, + "kl": 0.019927978515625, + "learning_rate": 2.4838558880588334e-07, + "loss": 0.0255, + "num_tokens": 137884494.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.041785478591919, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.046594125354614824, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05151794406291384, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12345339501504503, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3042 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1384.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 846.75, + "completions/mean_terminated_length": 846.75, + "completions/min_length": 504.0, + "completions/min_terminated_length": 504.0, + "epoch": 0.7609402350587647, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9764989901979737, + "kl": 0.01258087158203125, + "learning_rate": 2.480941883965256e-07, + "loss": 0.0807, + "num_tokens": 137907010.0, + "reward": 0.0, + "reward_std": 0.7730717658996582, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08227742876957843, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08806013050559923, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06652763279965648, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3043 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1264.625, + "completions/mean_terminated_length": 1210.3077392578125, + "completions/min_length": 1086.0, + "completions/min_terminated_length": 1086.0, + "epoch": 0.7611902975743936, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.528991997733664, + "kl": 0.0107574462890625, + "learning_rate": 2.478030180293599e-07, + "loss": -0.0059, + "num_tokens": 137965884.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9517225027084351, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.004361287474155677, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07271760126242739, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901863, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3044 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1434.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1232.625, + "completions/mean_terminated_length": 1232.625, + "completions/min_length": 1007.0, + "completions/min_terminated_length": 1007.0, + "epoch": 0.7614403600900225, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2364715616815753, + "kl": 0.019287109375, + "learning_rate": 2.475120779262485e-07, + "loss": -0.0119, + "num_tokens": 138025942.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9342586994171143, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.014420908135540391, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08139108764600582, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3045 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1240.9375, + "completions/mean_terminated_length": 1123.181884765625, + "completions/min_length": 715.0, + "completions/min_terminated_length": 715.0, + "epoch": 0.7616904226056515, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.838185360799542, + "kl": 0.01483154296875, + "learning_rate": 2.472213683088781e-07, + "loss": 0.0018, + "num_tokens": 138074813.0, + "reward": -4.470348358154297e-08, + "reward_std": 1.005654215812683, + "rewards/wordcountpos_reward_GEOBench/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.018835980646058837, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0796479981047911, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11729986896522632, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3046 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1365.0, + "completions/mean_length": 1119.4375, + "completions/mean_terminated_length": 1031.615478515625, + "completions/min_length": 721.0, + "completions/min_terminated_length": 721.0, + "epoch": 0.7619404851212803, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.740014767548676, + "kl": 0.0216064453125, + "learning_rate": 2.4693088939876e-07, + "loss": -0.0402, + "num_tokens": 138124540.0, + "reward": 0.0, + "reward_std": 0.8029362559318542, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0497293262294435, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.046746726441439596, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13045504405165223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3047 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1312.8125, + "completions/mean_terminated_length": 1200.5, + "completions/min_length": 915.0, + "completions/min_terminated_length": 915.0, + "epoch": 0.7621905476369092, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8747704249007056, + "kl": 0.0153045654296875, + "learning_rate": 2.466406414172292e-07, + "loss": -0.0168, + "num_tokens": 138182097.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6643801927566528, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.5820872733189631, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.42490952803415255, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11013459778666118, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3048 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1242.9375, + "completions/mean_terminated_length": 1206.21435546875, + "completions/min_length": 960.0, + "completions/min_terminated_length": 960.0, + "epoch": 0.7624406101525382, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.94151117523381, + "kl": 0.017425537109375, + "learning_rate": 2.463506245854452e-07, + "loss": -0.0136, + "num_tokens": 138222696.0, + "reward": 0.0, + "reward_std": 0.9806994199752808, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.13887419466975284, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1943327512481285, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11279282877125756, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3049 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1342.0, + "completions/max_terminated_length": 1342.0, + "completions/mean_length": 1060.1875, + "completions/mean_terminated_length": 1060.1875, + "completions/min_length": 741.0, + "completions/min_terminated_length": 741.0, + "epoch": 0.762690672668167, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.383119660834641, + "kl": 0.01849365234375, + "learning_rate": 2.460608391243911e-07, + "loss": -0.01, + "num_tokens": 138260891.0, + "reward": 0.0, + "reward_std": 0.7534471750259399, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0205753516982159, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03788814323280853, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563383, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3050 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1250.625, + "completions/mean_terminated_length": 1215.0, + "completions/min_length": 945.0, + "completions/min_terminated_length": 945.0, + "epoch": 0.7629407351837959, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1714871646958906, + "kl": 0.012969970703125, + "learning_rate": 2.457712852548738e-07, + "loss": 0.0147, + "num_tokens": 138314821.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0439023971557617, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02284187122036755, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04298711394100683, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3051 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1473.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1016.75, + "completions/mean_terminated_length": 1016.75, + "completions/min_length": 744.0, + "completions/min_terminated_length": 744.0, + "epoch": 0.7631907976994249, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3038141377589945, + "kl": 0.0116424560546875, + "learning_rate": 2.454819631975237e-07, + "loss": -0.0098, + "num_tokens": 138353409.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.010291576385498, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05417417543111735, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10381507819789955, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10327955589886446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3052 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1327.0, + "completions/mean_length": 1073.375, + "completions/mean_terminated_length": 1012.4285888671875, + "completions/min_length": 766.0, + "completions/min_terminated_length": 766.0, + "epoch": 0.7634408602150538, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.383213852682177, + "kl": 0.019500732421875, + "learning_rate": 2.451928731727945e-07, + "loss": -0.087, + "num_tokens": 138399423.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9512152075767517, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.13073808038093998, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14421595866934842, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09737289911202955, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3053 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1325.0, + "completions/max_terminated_length": 1325.0, + "completions/mean_length": 1228.4375, + "completions/mean_terminated_length": 1228.4375, + "completions/min_length": 1072.0, + "completions/min_terminated_length": 1072.0, + "epoch": 0.7636909227306826, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9385162243971001, + "kl": 0.00841522216796875, + "learning_rate": 2.4490401540096336e-07, + "loss": 0.0081, + "num_tokens": 138447630.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5954143404960632, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.013917297490954886, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05687953875164728, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10878112581387148, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3054 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1260.0, + "completions/max_terminated_length": 1260.0, + "completions/mean_length": 1047.625, + "completions/mean_terminated_length": 1047.625, + "completions/min_length": 927.0, + "completions/min_terminated_length": 927.0, + "epoch": 0.7639409852463116, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4387032239684316, + "kl": 0.0132293701171875, + "learning_rate": 2.446153901021303e-07, + "loss": -0.0226, + "num_tokens": 138491016.0, + "reward": 0.0, + "reward_std": 0.9929928183555603, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.030003817996331346, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07709960807639016, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408158, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3055 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1225.0, + "completions/mean_length": 990.5625, + "completions/mean_terminated_length": 956.6000366210938, + "completions/min_length": 711.0, + "completions/min_terminated_length": 711.0, + "epoch": 0.7641910477619405, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6394436307014177, + "kl": 0.020477294921875, + "learning_rate": 2.443269974962181e-07, + "loss": -0.0029, + "num_tokens": 138533217.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.959714949131012, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04546618631355635, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11164213726602708, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12881223774390615, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3056 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1192.75, + "completions/mean_terminated_length": 1148.857177734375, + "completions/min_length": 877.0, + "completions/min_terminated_length": 877.0, + "epoch": 0.7644411102775694, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.637412112662296, + "kl": 0.0117950439453125, + "learning_rate": 2.4403883780297243e-07, + "loss": -0.001, + "num_tokens": 138592197.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5764794945716858, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0042104340342061595, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.007964412552953083, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07503085784948504, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3057 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 1233.375, + "completions/mean_terminated_length": 1144.5, + "completions/min_length": 819.0, + "completions/min_terminated_length": 819.0, + "epoch": 0.7646911727931983, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8938084731947016, + "kl": 0.0185546875, + "learning_rate": 2.437509112419613e-07, + "loss": -0.0149, + "num_tokens": 138632011.0, + "reward": 0.0, + "reward_std": 0.8008660674095154, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.010124082816496216, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04354200945192317, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12285191326386657, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3058 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1227.0, + "completions/mean_terminated_length": 1014.6666870117188, + "completions/min_length": 755.0, + "completions/min_terminated_length": 755.0, + "epoch": 0.7649412353088272, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.945136270139117, + "kl": 0.017547607421875, + "learning_rate": 2.434632180325752e-07, + "loss": 0.0128, + "num_tokens": 138679083.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9650968313217163, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03991007637430401, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05349199007122785, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3059 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1147.125, + "completions/mean_terminated_length": 1096.71435546875, + "completions/min_length": 564.0, + "completions/min_terminated_length": 564.0, + "epoch": 0.7651912978244562, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.227348916437232, + "kl": 0.018890380859375, + "learning_rate": 2.431757583940267e-07, + "loss": -0.0813, + "num_tokens": 138724989.0, + "reward": 0.0, + "reward_std": 0.6966768503189087, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.3405623185818327, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2351405675167141, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3060 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1332.0, + "completions/max_terminated_length": 1332.0, + "completions/mean_length": 1084.6875, + "completions/mean_terminated_length": 1084.6875, + "completions/min_length": 696.0, + "completions/min_terminated_length": 696.0, + "epoch": 0.765441360340085, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.414654561466134, + "kl": 0.019439697265625, + "learning_rate": 2.4288853254535093e-07, + "loss": -0.0534, + "num_tokens": 138760416.0, + "reward": 0.0, + "reward_std": 0.9357314705848694, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.035734483057339984, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04670239667739582, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06206328908341753, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3061 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1317.0, + "completions/mean_length": 1163.5625, + "completions/mean_terminated_length": 1115.5, + "completions/min_length": 919.0, + "completions/min_terminated_length": 919.0, + "epoch": 0.7656914228557139, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.570864112408534, + "kl": 0.018829345703125, + "learning_rate": 2.426015407054039e-07, + "loss": 0.0023, + "num_tokens": 138795033.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0415124893188477, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07850505405043202, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1733371387915198, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3062 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1296.9375, + "completions/mean_terminated_length": 1229.25, + "completions/min_length": 1066.0, + "completions/min_terminated_length": 1066.0, + "epoch": 0.7659414853713429, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.950571875948064, + "kl": 0.014404296875, + "learning_rate": 2.423147830928639e-07, + "loss": 0.0045, + "num_tokens": 138845768.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0618255138397217, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1065214671883072, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.059512752391714385, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237132, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3063 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1301.0, + "completions/mean_length": 1167.8125, + "completions/mean_terminated_length": 1091.1539306640625, + "completions/min_length": 901.0, + "completions/min_terminated_length": 901.0, + "epoch": 0.7661915478869717, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2556577561768427, + "kl": 0.0165863037109375, + "learning_rate": 2.4202825992623105e-07, + "loss": -0.0352, + "num_tokens": 138888669.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.855426013469696, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.029076367563017954, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.060839740338775046, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1354690069789096, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3064 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1460.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1142.5, + "completions/mean_terminated_length": 1142.5, + "completions/min_length": 781.0, + "completions/min_terminated_length": 781.0, + "epoch": 0.7664416104026006, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2495787642561873, + "kl": 0.020843505859375, + "learning_rate": 2.4174197142382627e-07, + "loss": -0.0103, + "num_tokens": 138933853.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0113160610198975, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0030997467164376466, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09689748687363674, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07503085784948504, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3065 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1168.0, + "completions/max_terminated_length": 1168.0, + "completions/mean_length": 1007.9375, + "completions/mean_terminated_length": 1007.9375, + "completions/min_length": 703.0, + "completions/min_terminated_length": 703.0, + "epoch": 0.7666916729182296, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.584040191182554, + "kl": 0.024261474609375, + "learning_rate": 2.414559178037919e-07, + "loss": -0.0016, + "num_tokens": 138971724.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9399063587188721, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.14220542401912015, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20040864488983154, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11603000888978234, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3066 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1127.0, + "completions/mean_terminated_length": 1102.1334228515625, + "completions/min_length": 883.0, + "completions/min_terminated_length": 883.0, + "epoch": 0.7669417354338585, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.347731081551671, + "kl": 0.01898193359375, + "learning_rate": 2.4117009928409133e-07, + "loss": -0.0418, + "num_tokens": 139009356.0, + "reward": 0.0, + "reward_std": 0.5868188142776489, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11032929921492148, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15574185358349196, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237131, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3067 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1480.375, + "completions/mean_terminated_length": 1343.0, + "completions/min_length": 1238.0, + "completions/min_terminated_length": 1238.0, + "epoch": 0.7671917979494873, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1330945285496883, + "kl": 0.014862060546875, + "learning_rate": 2.408845160825087e-07, + "loss": 0.0032, + "num_tokens": 139078266.0, + "reward": 0.0, + "reward_std": 0.8991299867630005, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.034003587694012585, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06995642995749882, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.18048289136113171, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3068 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1445.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 900.3125, + "completions/mean_terminated_length": 900.3125, + "completions/min_length": 636.0, + "completions/min_terminated_length": 636.0, + "epoch": 0.7674418604651163, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1644519777594757, + "kl": 0.0156097412109375, + "learning_rate": 2.405991684166489e-07, + "loss": -0.0601, + "num_tokens": 139108567.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0596058368682861, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05762028727751973, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0778284200702731, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.133263870794973, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3069 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1433.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 938.25, + "completions/mean_terminated_length": 938.25, + "completions/min_length": 626.0, + "completions/min_terminated_length": 626.0, + "epoch": 0.7676919229807452, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.668078643625818, + "kl": 0.026214599609375, + "learning_rate": 2.4031405650393734e-07, + "loss": -0.0232, + "num_tokens": 139160363.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9878590106964111, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10722385694009962, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.22120895300158475, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11013459778666118, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3070 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 1407.25, + "completions/mean_terminated_length": 1335.111083984375, + "completions/min_length": 1097.0, + "completions/min_terminated_length": 1097.0, + "epoch": 0.767941985496374, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9950539440544315, + "kl": 0.015350341796875, + "learning_rate": 2.4002918056162e-07, + "loss": 0.0378, + "num_tokens": 139220311.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9748401641845703, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.13907785958625832, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16322283493213346, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3071 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1266.0625, + "completions/mean_terminated_length": 1212.0770263671875, + "completions/min_length": 878.0, + "completions/min_terminated_length": 878.0, + "epoch": 0.768192048012003, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.230808419964507, + "kl": 0.0090789794921875, + "learning_rate": 2.397445408067628e-07, + "loss": -0.0369, + "num_tokens": 139260000.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9358878135681152, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010524976708690216, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06364576325634179, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746357, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3072 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1058.3125, + "completions/mean_terminated_length": 1028.86669921875, + "completions/min_length": 683.0, + "completions/min_terminated_length": 683.0, + "epoch": 0.7684421105276319, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3531223762592037, + "kl": 0.02728271484375, + "learning_rate": 2.3946013745625183e-07, + "loss": -0.0836, + "num_tokens": 139299269.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9307312965393066, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05025999839548189, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.048417508979962574, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09737289911202955, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3073 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1314.0, + "completions/max_terminated_length": 1314.0, + "completions/mean_length": 953.5, + "completions/mean_terminated_length": 953.5, + "completions/min_length": 709.0, + "completions/min_terminated_length": 709.0, + "epoch": 0.7686921730432608, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.071399767183123, + "kl": 0.021026611328125, + "learning_rate": 2.3917597072679264e-07, + "loss": -0.0419, + "num_tokens": 139331861.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.7096107006072998, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06025534234522517, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03884101161969444, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16187558093703852, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3074 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1395.0, + "completions/mean_length": 1175.75, + "completions/mean_terminated_length": 981.2000122070312, + "completions/min_length": 348.0, + "completions/min_terminated_length": 348.0, + "epoch": 0.7689422355588897, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3671825819102663, + "kl": 0.01727294921875, + "learning_rate": 2.3889204083491125e-07, + "loss": -0.0702, + "num_tokens": 139387065.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.8155432939529419, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.024772136796286297, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04379203400585994, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194866, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3075 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1221.0, + "completions/max_terminated_length": 1221.0, + "completions/mean_length": 918.125, + "completions/mean_terminated_length": 918.125, + "completions/min_length": 659.0, + "completions/min_terminated_length": 659.0, + "epoch": 0.7691922980745186, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8045022933836066, + "kl": 0.02117919921875, + "learning_rate": 2.3860834799695257e-07, + "loss": -0.0549, + "num_tokens": 139429619.0, + "reward": 0.0, + "reward_std": 0.7037010192871094, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11616308187156175, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2398199553067558, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08933913745655643, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3076 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1252.0625, + "completions/mean_terminated_length": 1194.84619140625, + "completions/min_length": 834.0, + "completions/min_terminated_length": 834.0, + "epoch": 0.7694423605901476, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1775780256912967, + "kl": 0.014129638671875, + "learning_rate": 2.383248924290811e-07, + "loss": 0.0135, + "num_tokens": 139486620.0, + "reward": 0.0, + "reward_std": 0.918183445930481, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10727644615675828, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07428002358313548, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797313, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3077 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1396.0, + "completions/mean_length": 1023.875, + "completions/mean_terminated_length": 992.1333618164062, + "completions/min_length": 742.0, + "completions/min_terminated_length": 742.0, + "epoch": 0.7696924231057765, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3938292467807516, + "kl": 0.014312744140625, + "learning_rate": 2.3804167434728077e-07, + "loss": -0.0141, + "num_tokens": 139522930.0, + "reward": 0.0, + "reward_std": 0.9977412223815918, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.005565174944875537, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02226069977950215, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.161245154965971, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3078 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1166.0, + "completions/max_terminated_length": 1166.0, + "completions/mean_length": 993.0625, + "completions/mean_terminated_length": 993.0625, + "completions/min_length": 755.0, + "completions/min_terminated_length": 755.0, + "epoch": 0.7699424856214053, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6705724530641857, + "kl": 0.01715087890625, + "learning_rate": 2.3775869396735438e-07, + "loss": -0.0351, + "num_tokens": 139564907.0, + "reward": 0.0, + "reward_std": 1.044958472251892, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0583713068876875, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06374298798807496, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07490735018081413, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3079 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1326.4375, + "completions/mean_terminated_length": 1268.5833740234375, + "completions/min_length": 1102.0, + "completions/min_terminated_length": 1102.0, + "epoch": 0.7701925481370343, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8663716922880784, + "kl": 0.018096923828125, + "learning_rate": 2.374759515049232e-07, + "loss": 0.032, + "num_tokens": 139606266.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9736416339874268, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1293685399221953, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11242129025073994, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0683130051063973, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3080 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1484.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1123.625, + "completions/mean_terminated_length": 1123.625, + "completions/min_length": 824.0, + "completions/min_terminated_length": 824.0, + "epoch": 0.7704426106526632, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8279515731172, + "kl": 0.020050048828125, + "learning_rate": 2.3719344717542775e-07, + "loss": -0.0442, + "num_tokens": 139645620.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0610628128051758, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.044517892072150025, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06911901703387456, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1057600358603626, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3081 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1304.5625, + "completions/mean_terminated_length": 1187.300048828125, + "completions/min_length": 626.0, + "completions/min_terminated_length": 626.0, + "epoch": 0.770692673168292, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.975188983383075, + "kl": 0.0167236328125, + "learning_rate": 2.36911181194127e-07, + "loss": 0.0378, + "num_tokens": 139694525.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7475026249885559, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06051431186615263, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1695524877541303, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11013459778666117, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3082 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1294.5, + "completions/mean_terminated_length": 1247.0770263671875, + "completions/min_length": 862.0, + "completions/min_terminated_length": 862.0, + "epoch": 0.770942735683921, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3859184087711176, + "kl": 0.020751953125, + "learning_rate": 2.366291537760983e-07, + "loss": -0.0289, + "num_tokens": 139748149.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0638489723205566, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.16022650089316362, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2089511280477232, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1261979632400061, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3083 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1389.0, + "completions/mean_length": 1267.25, + "completions/mean_terminated_length": 1213.5384521484375, + "completions/min_length": 1037.0, + "completions/min_terminated_length": 1037.0, + "epoch": 0.7711927981995499, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9112115934121525, + "kl": 0.019073486328125, + "learning_rate": 2.363473651362371e-07, + "loss": 0.0342, + "num_tokens": 139794113.0, + "reward": 0.0, + "reward_std": 0.9863105416297913, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08629601929303965, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14599610464813417, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797313, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3084 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1392.0, + "completions/max_terminated_length": 1392.0, + "completions/mean_length": 1055.1875, + "completions/mean_terminated_length": 1055.1875, + "completions/min_length": 781.0, + "completions/min_terminated_length": 781.0, + "epoch": 0.7714428607151788, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9987184667765967, + "kl": 0.0170440673828125, + "learning_rate": 2.360658154892569e-07, + "loss": -0.0327, + "num_tokens": 139829396.0, + "reward": 0.0, + "reward_std": 0.9509248733520508, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07579453645312362, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03708276865238698, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14298407059684812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3085 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1301.5, + "completions/mean_terminated_length": 1211.272705078125, + "completions/min_length": 1037.0, + "completions/min_terminated_length": 1037.0, + "epoch": 0.7716929232308077, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.568429379638132, + "kl": 0.00905609130859375, + "learning_rate": 2.3578450504968933e-07, + "loss": -0.0157, + "num_tokens": 139873468.0, + "reward": 0.0, + "reward_std": 0.942929744720459, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.012192238523296713, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.026330704538590485, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09339283817414601, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3086 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1102.0, + "completions/max_terminated_length": 1102.0, + "completions/mean_length": 857.875, + "completions/mean_terminated_length": 857.875, + "completions/min_length": 657.0, + "completions/min_terminated_length": 657.0, + "epoch": 0.7719429857464366, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7218282573509365, + "kl": 0.0164947509765625, + "learning_rate": 2.3550343403188343e-07, + "loss": -0.0516, + "num_tokens": 139913618.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0340756177902222, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.007839214452741761, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10270104224763131, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042256, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3087 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.8125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1459.8125, + "completions/mean_terminated_length": 1285.666748046875, + "completions/min_length": 916.0, + "completions/min_terminated_length": 916.0, + "epoch": 0.7721930482620655, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.736147834342746, + "kl": 0.0164642333984375, + "learning_rate": 2.35222602650006e-07, + "loss": 0.0203, + "num_tokens": 139969479.0, + "reward": 0.0, + "reward_std": 0.6911581158638, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.26379204751601354, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2758844786873949, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1355373393953503, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3088 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1349.625, + "completions/mean_terminated_length": 1232.6666259765625, + "completions/min_length": 788.0, + "completions/min_terminated_length": 788.0, + "epoch": 0.7724431107776945, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0344050135030227, + "kl": 0.01898193359375, + "learning_rate": 2.3494201111804132e-07, + "loss": -0.0519, + "num_tokens": 140009305.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5520751476287842, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10123637998571357, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08664703050361178, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11155467020454342, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3089 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1460.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1191.25, + "completions/mean_terminated_length": 1191.25, + "completions/min_length": 959.0, + "completions/min_terminated_length": 959.0, + "epoch": 0.7726931732933233, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.267452059167474, + "kl": 0.0186614990234375, + "learning_rate": 2.3466165964979078e-07, + "loss": -0.0466, + "num_tokens": 140062797.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9233856201171875, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05421884902033858, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1123741003096069, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05000000000000003, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3090 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1396.0, + "completions/mean_length": 1021.3125, + "completions/mean_terminated_length": 989.4000244140625, + "completions/min_length": 764.0, + "completions/min_terminated_length": 764.0, + "epoch": 0.7729432358089522, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3420723938668213, + "kl": 0.015045166015625, + "learning_rate": 2.3438154845887284e-07, + "loss": -0.0443, + "num_tokens": 140103890.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8839049339294434, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0012623357911534117, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.051670202419173396, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3091 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1416.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1000.25, + "completions/mean_terminated_length": 1000.25, + "completions/min_length": 672.0, + "completions/min_terminated_length": 672.0, + "epoch": 0.7731932983245812, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4842681690756936, + "kl": 0.0171661376953125, + "learning_rate": 2.3410167775872291e-07, + "loss": -0.0044, + "num_tokens": 140131694.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.936724066734314, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09910674767545996, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1041751999092905, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11279282877125756, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3092 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1372.0, + "completions/mean_length": 1183.375, + "completions/mean_terminated_length": 1138.1429443359375, + "completions/min_length": 999.0, + "completions/min_terminated_length": 999.0, + "epoch": 0.77344336084021, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.452702076917666, + "kl": 0.017822265625, + "learning_rate": 2.3382204776259324e-07, + "loss": 0.0364, + "num_tokens": 140171836.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.5765860080718994, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11698811823088019, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1857376996093616, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10886621079036349, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3093 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1206.375, + "completions/mean_terminated_length": 1164.4285888671875, + "completions/min_length": 826.0, + "completions/min_terminated_length": 826.0, + "epoch": 0.7736934233558389, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2582280717402408, + "kl": 0.0172271728515625, + "learning_rate": 2.3354265868355246e-07, + "loss": 0.0622, + "num_tokens": 140220810.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6588790416717529, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.060383248547345766, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07578502421473564, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16594287281181147, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3094 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1402.0, + "completions/max_terminated_length": 1402.0, + "completions/mean_length": 1051.875, + "completions/mean_terminated_length": 1051.875, + "completions/min_length": 874.0, + "completions/min_terminated_length": 874.0, + "epoch": 0.7739434858714679, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.701762728061946, + "kl": 0.011932373046875, + "learning_rate": 2.3326351073448573e-07, + "loss": -0.0152, + "num_tokens": 140257672.0, + "reward": 0.0, + "reward_std": 0.8970906734466553, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.017481072035133424, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10911876975533763, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06666666666666668, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3095 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1343.0, + "completions/mean_terminated_length": 1248.800048828125, + "completions/min_length": 812.0, + "completions/min_terminated_length": 812.0, + "epoch": 0.7741935483870968, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4632291353716336, + "kl": 0.0161590576171875, + "learning_rate": 2.329846041280949e-07, + "loss": 0.0199, + "num_tokens": 140306296.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7411212921142578, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.001832079961552463, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04633622345302878, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11409872268574492, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3096 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1079.0625, + "completions/mean_terminated_length": 1018.9285888671875, + "completions/min_length": 803.0, + "completions/min_terminated_length": 803.0, + "epoch": 0.7744436109027257, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.89354550393684, + "kl": 0.01605224609375, + "learning_rate": 2.3270593907689708e-07, + "loss": 0.0474, + "num_tokens": 140343969.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0409399271011353, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0017563559925468102, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03942858991994365, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1102186379345533, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3097 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1472.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1067.3125, + "completions/mean_terminated_length": 1067.3125, + "completions/min_length": 711.0, + "completions/min_terminated_length": 711.0, + "epoch": 0.7746936734183546, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8172831022282683, + "kl": 0.0160980224609375, + "learning_rate": 2.3242751579322577e-07, + "loss": -0.034, + "num_tokens": 140392174.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7639655470848083, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.026961856616260344, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18332574814650143, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.131021626713557, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3098 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1201.9375, + "completions/mean_terminated_length": 1023.1000366210938, + "completions/min_length": 687.0, + "completions/min_terminated_length": 687.0, + "epoch": 0.7749437359339835, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5928350927119705, + "kl": 0.0131378173828125, + "learning_rate": 2.321493344892306e-07, + "loss": -0.0095, + "num_tokens": 140444237.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9732303619384766, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09854827113049278, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08186283041414212, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07391185942027818, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3099 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1489.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1103.25, + "completions/mean_terminated_length": 1103.25, + "completions/min_length": 828.0, + "completions/min_terminated_length": 828.0, + "epoch": 0.7751937984496124, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.736061175541406, + "kl": 0.02044677734375, + "learning_rate": 2.318713953768763e-07, + "loss": 0.0013, + "num_tokens": 140485169.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9940143823623657, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07867576826172754, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15306243463633312, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258098, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3100 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1180.875, + "completions/mean_terminated_length": 1159.60009765625, + "completions/min_length": 806.0, + "completions/min_terminated_length": 806.0, + "epoch": 0.7754438609652413, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9881180265470744, + "kl": 0.015045166015625, + "learning_rate": 2.3159369866794318e-07, + "loss": -0.0738, + "num_tokens": 140530999.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0346760749816895, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0202255051296496, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04944581643720921, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08777074514725111, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3101 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1194.1875, + "completions/mean_terminated_length": 1173.800048828125, + "completions/min_length": 1036.0, + "completions/min_terminated_length": 1036.0, + "epoch": 0.7756939234808702, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.6564890579271978, + "kl": 0.01010894775390625, + "learning_rate": 2.31316244574027e-07, + "loss": -0.0118, + "num_tokens": 140572826.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.747949481010437, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11910704365684503, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1319711676439974, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823629, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3102 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1217.8125, + "completions/mean_terminated_length": 1199.0001220703125, + "completions/min_length": 909.0, + "completions/min_terminated_length": 909.0, + "epoch": 0.7759439859964992, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.020614642905669, + "kl": 0.015869140625, + "learning_rate": 2.310390333065385e-07, + "loss": 0.0215, + "num_tokens": 140617431.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.0483922958374023, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.031273475913894185, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04042577166354753, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.141878925953186, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3103 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1233.0, + "completions/mean_terminated_length": 1111.6363525390625, + "completions/min_length": 838.0, + "completions/min_terminated_length": 838.0, + "epoch": 0.776194048512128, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.875805489089078, + "kl": 0.016754150390625, + "learning_rate": 2.307620650767034e-07, + "loss": 0.0206, + "num_tokens": 140659095.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9731847047805786, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019530498207480245, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.053361791147210166, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06540472290116196, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3104 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1400.25, + "completions/mean_terminated_length": 1340.4000244140625, + "completions/min_length": 1236.0, + "completions/min_terminated_length": 1236.0, + "epoch": 0.7764441110277569, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5132557861161517, + "kl": 0.0132598876953125, + "learning_rate": 2.304853400955623e-07, + "loss": -0.008, + "num_tokens": 140718803.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0275650024414062, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.015635784554176878, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07490774514667423, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11894598836509011, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3105 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1400.0, + "completions/max_terminated_length": 1400.0, + "completions/mean_length": 1050.0, + "completions/mean_terminated_length": 1050.0, + "completions/min_length": 705.0, + "completions/min_terminated_length": 705.0, + "epoch": 0.7766941735433859, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3280367509940563, + "kl": 0.0179443359375, + "learning_rate": 2.3020885857397058e-07, + "loss": -0.0107, + "num_tokens": 140765243.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6816219091415405, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03964310683763073, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09806032283998156, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14240006242195888, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3106 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1287.0, + "completions/mean_length": 1267.8125, + "completions/mean_terminated_length": 1087.2222900390625, + "completions/min_length": 920.0, + "completions/min_terminated_length": 920.0, + "epoch": 0.7769442360590147, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.433541572313009, + "kl": 0.0147247314453125, + "learning_rate": 2.2993262072259787e-07, + "loss": -0.025, + "num_tokens": 140813080.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.038700819015503, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.019306813291930458, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0743766939705862, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11603000888978232, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3107 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1352.0, + "completions/max_terminated_length": 1352.0, + "completions/mean_length": 1096.5, + "completions/mean_terminated_length": 1096.5, + "completions/min_length": 801.0, + "completions/min_terminated_length": 801.0, + "epoch": 0.7771942985746436, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8663078463120857, + "kl": 0.0160980224609375, + "learning_rate": 2.296566267519282e-07, + "loss": -0.0211, + "num_tokens": 140855720.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0269923210144043, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09794845658329651, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09164343848467564, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3108 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1329.0, + "completions/mean_terminated_length": 1272.0, + "completions/min_length": 1065.0, + "completions/min_terminated_length": 1065.0, + "epoch": 0.7774443610902726, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.219128186421624, + "kl": 0.021087646484375, + "learning_rate": 2.2938087687225982e-07, + "loss": 0.0131, + "num_tokens": 140902688.0, + "reward": 0.0, + "reward_std": 0.9489836096763611, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02470623400231839, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09944642826830641, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1002773930432755, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3109 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1361.0, + "completions/mean_length": 1138.625, + "completions/mean_terminated_length": 1114.533447265625, + "completions/min_length": 779.0, + "completions/min_terminated_length": 779.0, + "epoch": 0.7776944236059015, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9705663849013018, + "kl": 0.0131378173828125, + "learning_rate": 2.2910537129370506e-07, + "loss": 0.0063, + "num_tokens": 140932218.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.050583839416504, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.15913454334658433, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17475040196499683, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1253144193766372, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3110 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1463.8125, + "completions/mean_terminated_length": 1417.2857666015625, + "completions/min_length": 1375.0, + "completions/min_terminated_length": 1375.0, + "epoch": 0.7779444861215303, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1163603782346696, + "kl": 0.0086669921875, + "learning_rate": 2.2883011022618985e-07, + "loss": 0.0016, + "num_tokens": 140990335.0, + "reward": 0.0, + "reward_std": 0.8141257166862488, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.027619728706072842, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.044826049061904336, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066474, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3111 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1250.4375, + "completions/mean_terminated_length": 1192.84619140625, + "completions/min_length": 881.0, + "completions/min_terminated_length": 881.0, + "epoch": 0.7781945486371593, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0416316728506536, + "kl": 0.0162353515625, + "learning_rate": 2.28555093879454e-07, + "loss": -0.0033, + "num_tokens": 141043806.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.0224816799163818, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06410043156931138, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08072716429478016, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3112 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1359.0, + "completions/mean_length": 1157.3125, + "completions/mean_terminated_length": 1001.5454711914062, + "completions/min_length": 792.0, + "completions/min_terminated_length": 792.0, + "epoch": 0.7784446111527882, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.666017582800976, + "kl": 0.0131988525390625, + "learning_rate": 2.2828032246305092e-07, + "loss": 0.0604, + "num_tokens": 141089443.0, + "reward": 0.0, + "reward_std": 1.0667858123779297, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02556756921555981, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05697211774846252, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567835, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3113 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1182.0, + "completions/mean_length": 1159.8125, + "completions/mean_terminated_length": 895.2222290039062, + "completions/min_length": 575.0, + "completions/min_terminated_length": 575.0, + "epoch": 0.7786946736684172, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.384157412110841, + "kl": 0.00891876220703125, + "learning_rate": 2.2800579618634747e-07, + "loss": -0.0047, + "num_tokens": 141133032.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9740424156188965, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0286292265948608, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.051410029635049107, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14981470036162822, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3114 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1248.0625, + "completions/mean_terminated_length": 1164.0833740234375, + "completions/min_length": 936.0, + "completions/min_terminated_length": 936.0, + "epoch": 0.778944736184046, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.793614312540597, + "kl": 0.01837158203125, + "learning_rate": 2.2773151525852312e-07, + "loss": 0.0054, + "num_tokens": 141178785.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9271921515464783, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.043077534170378795, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0446677003053973, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194865, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3115 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1422.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 1101.25, + "completions/mean_terminated_length": 1101.25, + "completions/min_length": 951.0, + "completions/min_terminated_length": 951.0, + "epoch": 0.7791947986996749, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.108268550859944, + "kl": 0.013519287109375, + "learning_rate": 2.2745747988857095e-07, + "loss": -0.0234, + "num_tokens": 141224469.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0622438192367554, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.004627338306607586, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09789469054738785, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.047919685895217425, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3116 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1208.8125, + "completions/mean_terminated_length": 1167.21435546875, + "completions/min_length": 897.0, + "completions/min_terminated_length": 897.0, + "epoch": 0.7794448612153039, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8278440479699203, + "kl": 0.0157318115234375, + "learning_rate": 2.27183690285297e-07, + "loss": 0.0109, + "num_tokens": 141259122.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.029374361038208, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.004844866445574107, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.030853676804279005, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026001, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3117 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1351.0, + "completions/max_terminated_length": 1351.0, + "completions/mean_length": 1106.625, + "completions/mean_terminated_length": 1106.625, + "completions/min_length": 829.0, + "completions/min_terminated_length": 829.0, + "epoch": 0.7796949237309327, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6196731578310777, + "kl": 0.0157470703125, + "learning_rate": 2.2691014665731973e-07, + "loss": -0.0799, + "num_tokens": 141292708.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0174932479858398, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.052906723633206205, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0563798707433637, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09888264649460887, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3118 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1378.0, + "completions/max_terminated_length": 1378.0, + "completions/mean_length": 1022.0, + "completions/mean_terminated_length": 1022.0, + "completions/min_length": 716.0, + "completions/min_terminated_length": 716.0, + "epoch": 0.7799449862465616, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.74218429680847, + "kl": 0.021240234375, + "learning_rate": 2.266368492130703e-07, + "loss": -0.0058, + "num_tokens": 141337860.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8891324400901794, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.058103304213931645, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08198893316806888, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238707, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3119 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1289.9375, + "completions/mean_terminated_length": 1275.933349609375, + "completions/min_length": 960.0, + "completions/min_terminated_length": 960.0, + "epoch": 0.7801950487621906, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1437254568045487, + "kl": 0.01727294921875, + "learning_rate": 2.2636379816079227e-07, + "loss": -0.0, + "num_tokens": 141387611.0, + "reward": 0.0, + "reward_std": 1.061630368232727, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05291257644368569, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11804166902557756, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07302967433402215, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3120 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1405.0, + "completions/max_terminated_length": 1405.0, + "completions/mean_length": 1056.0, + "completions/mean_terminated_length": 1056.0, + "completions/min_length": 876.0, + "completions/min_terminated_length": 876.0, + "epoch": 0.7804451112778195, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.329200400205436, + "kl": 0.0132598876953125, + "learning_rate": 2.2609099370854146e-07, + "loss": -0.0906, + "num_tokens": 141427635.0, + "reward": 0.0, + "reward_std": 0.8885282278060913, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.042142970806390304, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0669935582370283, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09727776191382576, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3121 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1485.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1216.9375, + "completions/mean_terminated_length": 1216.9375, + "completions/min_length": 961.0, + "completions/min_terminated_length": 961.0, + "epoch": 0.7806951737934483, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.237838902619618, + "kl": 0.01513671875, + "learning_rate": 2.2581843606418576e-07, + "loss": 0.001, + "num_tokens": 141465482.0, + "reward": 0.0, + "reward_std": 0.7738313674926758, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00917892960332818, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13949987882121415, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.061913918736689055, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3122 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 1094.75, + "completions/mean_terminated_length": 1067.7333984375, + "completions/min_length": 788.0, + "completions/min_terminated_length": 788.0, + "epoch": 0.7809452363090773, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0008450350932, + "kl": 0.01959228515625, + "learning_rate": 2.2554612543540498e-07, + "loss": 0.0298, + "num_tokens": 141516062.0, + "reward": 0.0, + "reward_std": 0.9720662236213684, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02974056939821558, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06948404776209717, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11417984514369005, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3123 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1247.0, + "completions/mean_terminated_length": 1210.857177734375, + "completions/min_length": 1011.0, + "completions/min_terminated_length": 1011.0, + "epoch": 0.7811952988247062, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.213290634610443, + "kl": 0.019561767578125, + "learning_rate": 2.25274062029691e-07, + "loss": -0.0314, + "num_tokens": 141572430.0, + "reward": 0.0, + "reward_std": 0.9156955480575562, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00037095885526235335, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0995810195374684, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333331, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3124 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1421.0, + "completions/max_terminated_length": 1421.0, + "completions/mean_length": 1085.6875, + "completions/mean_terminated_length": 1085.6875, + "completions/min_length": 627.0, + "completions/min_terminated_length": 627.0, + "epoch": 0.781445361340335, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4022655604697425, + "kl": 0.0169525146484375, + "learning_rate": 2.2500224605434692e-07, + "loss": -0.0719, + "num_tokens": 141604761.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8711904287338257, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09873657900531337, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1524457076054459, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09583937179043478, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3125 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 1222.0, + "completions/mean_terminated_length": 1182.2857666015625, + "completions/min_length": 838.0, + "completions/min_terminated_length": 838.0, + "epoch": 0.781695423855964, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5261286932157385, + "kl": 0.02484130859375, + "learning_rate": 2.247306777164875e-07, + "loss": -0.0628, + "num_tokens": 141658929.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0253026485443115, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04089279257741166, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09084326149356381, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0739118594202782, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3126 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1050.0, + "completions/mean_terminated_length": 1020.0000610351562, + "completions/min_length": 810.0, + "completions/min_terminated_length": 810.0, + "epoch": 0.7819454863715929, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3678002326909833, + "kl": 0.019287109375, + "learning_rate": 2.2445935722303882e-07, + "loss": -0.0099, + "num_tokens": 141704017.0, + "reward": 0.0, + "reward_std": 0.7570704817771912, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.013553367959171274, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09210244381271897, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353545, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3127 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1483.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1144.6875, + "completions/mean_terminated_length": 1144.6875, + "completions/min_length": 801.0, + "completions/min_terminated_length": 801.0, + "epoch": 0.7821955488872218, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0060557882852197, + "kl": 0.0207366943359375, + "learning_rate": 2.2418828478073803e-07, + "loss": -0.0272, + "num_tokens": 141753396.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0438638925552368, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08712486354147402, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12674824263936774, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476836, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3128 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1340.0, + "completions/mean_length": 1092.375, + "completions/mean_terminated_length": 1034.1429443359375, + "completions/min_length": 707.0, + "completions/min_terminated_length": 707.0, + "epoch": 0.7824456114028507, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9959947795208706, + "kl": 0.0142974853515625, + "learning_rate": 2.2391746059613335e-07, + "loss": -0.0157, + "num_tokens": 141786130.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9748282432556152, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04441638643555737, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07335370233992354, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10886621079036347, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3129 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1376.25, + "completions/mean_terminated_length": 1335.0, + "completions/min_length": 1156.0, + "completions/min_terminated_length": 1156.0, + "epoch": 0.7826956739184796, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0361215149506515, + "kl": 0.021453857421875, + "learning_rate": 2.236468848755836e-07, + "loss": -0.0002, + "num_tokens": 141838014.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.902534008026123, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.006780780662539913, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08205118824778404, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563382, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3130 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1275.0, + "completions/mean_length": 1098.4375, + "completions/mean_terminated_length": 1071.666748046875, + "completions/min_length": 917.0, + "completions/min_terminated_length": 917.0, + "epoch": 0.7829457364341085, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8762372199466335, + "kl": 0.015228271484375, + "learning_rate": 2.233765578252589e-07, + "loss": -0.0108, + "num_tokens": 141870421.0, + "reward": 9.313225746154785e-09, + "reward_std": 1.0109643936157227, + "rewards/wordcountpos_reward_GEOBench/mean": 9.313225746154785e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04085727769177524, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09286995120984347, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3131 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1272.125, + "completions/mean_terminated_length": 1219.5384521484375, + "completions/min_length": 883.0, + "completions/min_terminated_length": 883.0, + "epoch": 0.7831957989497375, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5807613601209574, + "kl": 0.02197265625, + "learning_rate": 2.231064796511393e-07, + "loss": -0.0292, + "num_tokens": 141920359.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0024752616882324, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1164343330019168, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16869150302831726, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3132 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1396.0, + "completions/mean_length": 1207.25, + "completions/mean_terminated_length": 1139.6923828125, + "completions/min_length": 936.0, + "completions/min_terminated_length": 936.0, + "epoch": 0.7834458614653663, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.388342037899947, + "kl": 0.0196533203125, + "learning_rate": 2.2283665055901512e-07, + "loss": 0.0006, + "num_tokens": 141963211.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7664536833763123, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01933388933437418, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03282054466052182, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08432740427115679, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3133 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1288.0625, + "completions/mean_terminated_length": 1217.416748046875, + "completions/min_length": 977.0, + "completions/min_terminated_length": 977.0, + "epoch": 0.7836959239809953, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5480827689403043, + "kl": 0.0142364501953125, + "learning_rate": 2.2256707075448753e-07, + "loss": -0.0459, + "num_tokens": 142020156.0, + "reward": 0.0, + "reward_std": 0.5042341947555542, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.050684843969423764, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07220364185429919, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0824396524513313, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3134 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1146.625, + "completions/mean_terminated_length": 1123.0667724609375, + "completions/min_length": 733.0, + "completions/min_terminated_length": 733.0, + "epoch": 0.7839459864966242, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2920539443023378, + "kl": 0.021270751953125, + "learning_rate": 2.2229774044296724e-07, + "loss": -0.0054, + "num_tokens": 142065846.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0148265361785889, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02666766254657847, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16912827212970116, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0894427190999916, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3135 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1237.125, + "completions/mean_terminated_length": 1219.60009765625, + "completions/min_length": 1037.0, + "completions/min_terminated_length": 1037.0, + "epoch": 0.784196049012253, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0220128643232407, + "kl": 0.0189056396484375, + "learning_rate": 2.2202865982967496e-07, + "loss": -0.0324, + "num_tokens": 142111616.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9633963704109192, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07488432336750178, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09390590343880265, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10027739304327549, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3136 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1291.125, + "completions/mean_terminated_length": 1196.181884765625, + "completions/min_length": 352.0, + "completions/min_terminated_length": 352.0, + "epoch": 0.784446111527882, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.758368633052664, + "kl": 0.022857666015625, + "learning_rate": 2.217598291196412e-07, + "loss": -0.104, + "num_tokens": 142155658.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.5864871740341187, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0801314961456984, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1770762452463528, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12881223774390613, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3137 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 1209.0, + "completions/mean_terminated_length": 1189.60009765625, + "completions/min_length": 987.0, + "completions/min_terminated_length": 987.0, + "epoch": 0.7846961740435109, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8802252845686485, + "kl": 0.018463134765625, + "learning_rate": 2.2149124851770602e-07, + "loss": 0.0014, + "num_tokens": 142193202.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9145612716674805, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03805509372599068, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07940950433705883, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1085254706406647, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3138 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1162.4375, + "completions/mean_terminated_length": 1049.916748046875, + "completions/min_length": 698.0, + "completions/min_terminated_length": 698.0, + "epoch": 0.7849462365591398, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.350279876484063, + "kl": 0.02490234375, + "learning_rate": 2.2122291822851892e-07, + "loss": 0.0066, + "num_tokens": 142229337.0, + "reward": -1.862645149230957e-09, + "reward_std": 1.0688577890396118, + "rewards/wordcountpos_reward_GEOBench/mean": -1.862645149230957e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.005372131847339209, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0689943517503309, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10878112581387149, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3139 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1142.8125, + "completions/mean_terminated_length": 980.45458984375, + "completions/min_length": 716.0, + "completions/min_terminated_length": 716.0, + "epoch": 0.7851962990747687, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.709532379013106, + "kl": 0.0160675048828125, + "learning_rate": 2.209548384565385e-07, + "loss": 0.0047, + "num_tokens": 142267214.0, + "reward": 0.0, + "reward_std": 0.7410358190536499, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07745456100519076, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1045731993438538, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16049691355057039, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3140 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1361.0, + "completions/mean_length": 1269.125, + "completions/mean_terminated_length": 1215.84619140625, + "completions/min_length": 1037.0, + "completions/min_terminated_length": 1037.0, + "epoch": 0.7854463615903976, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0479486492305883, + "kl": 0.0093994140625, + "learning_rate": 2.2068700940603297e-07, + "loss": 0.0481, + "num_tokens": 142313576.0, + "reward": 0.0, + "reward_std": 0.9494744539260864, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10294111678355598, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07397944732868468, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3141 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1493.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1072.6875, + "completions/mean_terminated_length": 1072.6875, + "completions/min_length": 736.0, + "completions/min_terminated_length": 736.0, + "epoch": 0.7856964241060265, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1391994783264363, + "kl": 0.0106964111328125, + "learning_rate": 2.2041943128107897e-07, + "loss": -0.0469, + "num_tokens": 142350843.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8932387828826904, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02198987875681524, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11937288910910554, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066221, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3142 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1176.0, + "completions/mean_length": 1097.9375, + "completions/mean_terminated_length": 963.9166870117188, + "completions/min_length": 642.0, + "completions/min_terminated_length": 642.0, + "epoch": 0.7859464866216554, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.467509160777894, + "kl": 0.020416259765625, + "learning_rate": 2.201521042855621e-07, + "loss": -0.0657, + "num_tokens": 142393466.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5027040243148804, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0011529277527924115, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12836007432995503, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10945995377982529, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3143 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1220.0, + "completions/max_terminated_length": 1220.0, + "completions/mean_length": 1071.125, + "completions/mean_terminated_length": 1071.125, + "completions/min_length": 886.0, + "completions/min_terminated_length": 886.0, + "epoch": 0.7861965491372843, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.767120591442427, + "kl": 0.0170135498046875, + "learning_rate": 2.1988502862317676e-07, + "loss": -0.0144, + "num_tokens": 142442428.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9196226596832275, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03742020979775437, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1227015904412681, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567837, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3144 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1034.0, + "completions/mean_terminated_length": 1002.9334106445312, + "completions/min_length": 744.0, + "completions/min_terminated_length": 744.0, + "epoch": 0.7864466116529132, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6097048139666863, + "kl": 0.020263671875, + "learning_rate": 2.1961820449742558e-07, + "loss": -0.0356, + "num_tokens": 142482924.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.055826187133789, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.14820908021513177, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12191903948531894, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1465024333004847, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3145 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1156.375, + "completions/mean_terminated_length": 1156.375, + "completions/min_length": 758.0, + "completions/min_terminated_length": 758.0, + "epoch": 0.7866966741685422, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1168787002239524, + "kl": 0.01971435546875, + "learning_rate": 2.1935163211161984e-07, + "loss": 0.0095, + "num_tokens": 142517482.0, + "reward": 0.0, + "reward_std": 0.3894825875759125, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05353437005519248, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05742025659094507, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14782371884055637, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3146 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1429.0, + "completions/mean_length": 1349.875, + "completions/mean_terminated_length": 1199.75, + "completions/min_length": 910.0, + "completions/min_terminated_length": 910.0, + "epoch": 0.786946736684171, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.909907481660971, + "kl": 0.0164337158203125, + "learning_rate": 2.1908531166887854e-07, + "loss": -0.0164, + "num_tokens": 142563784.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8045768737792969, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.038924169965211114, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08002711015669282, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042252, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3147 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1242.0, + "completions/mean_terminated_length": 1156.0, + "completions/min_length": 808.0, + "completions/min_terminated_length": 808.0, + "epoch": 0.7871967991997999, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.352832477232744, + "kl": 0.01947021484375, + "learning_rate": 2.1881924337212932e-07, + "loss": 0.0311, + "num_tokens": 142600192.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9132756590843201, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00167367808699561, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07628591083863434, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0758897836290186, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3148 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1213.0, + "completions/mean_length": 1018.125, + "completions/mean_terminated_length": 986.0000610351562, + "completions/min_length": 687.0, + "completions/min_terminated_length": 687.0, + "epoch": 0.7874468617154289, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.103730795124191, + "kl": 0.014312744140625, + "learning_rate": 2.1855342742410736e-07, + "loss": -0.0454, + "num_tokens": 142635010.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6687207818031311, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.22587171793637625, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2130482181816886, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14497764834110988, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3149 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1330.75, + "completions/mean_terminated_length": 1253.8182373046875, + "completions/min_length": 993.0, + "completions/min_terminated_length": 993.0, + "epoch": 0.7876969242310577, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3993834338122277, + "kl": 0.0174560546875, + "learning_rate": 2.1828786402735555e-07, + "loss": 0.0032, + "num_tokens": 142687486.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9331967830657959, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08351661936058752, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18325340441520357, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3150 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1261.0, + "completions/max_terminated_length": 1261.0, + "completions/mean_length": 979.625, + "completions/mean_terminated_length": 979.625, + "completions/min_length": 703.0, + "completions/min_terminated_length": 703.0, + "epoch": 0.7879469867466866, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2874245060159217, + "kl": 0.020233154296875, + "learning_rate": 2.1802255338422403e-07, + "loss": -0.0129, + "num_tokens": 142722976.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7961347103118896, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09070038530736908, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06931874039886982, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639735, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3151 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1315.0, + "completions/mean_length": 1080.5625, + "completions/mean_terminated_length": 1020.6428833007812, + "completions/min_length": 729.0, + "completions/min_terminated_length": 729.0, + "epoch": 0.7881970492623156, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.082275510325419, + "kl": 0.0156707763671875, + "learning_rate": 2.177574956968712e-07, + "loss": -0.0276, + "num_tokens": 142753649.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.73228520154953, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.015137529624113442, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03973572420784079, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12988598989256067, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3152 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1298.0, + "completions/mean_length": 1329.4375, + "completions/mean_terminated_length": 1110.1429443359375, + "completions/min_length": 959.0, + "completions/min_terminated_length": 959.0, + "epoch": 0.7884471117779445, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.471299897117188, + "kl": 0.0176849365234375, + "learning_rate": 2.1749269116726184e-07, + "loss": 0.0051, + "num_tokens": 142806008.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.031025767326355, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09284164202341658, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14696029023933577, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11155467020454343, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3153 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1425.0, + "completions/mean_length": 1232.4375, + "completions/mean_terminated_length": 1194.21435546875, + "completions/min_length": 987.0, + "completions/min_terminated_length": 987.0, + "epoch": 0.7886971742935734, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.24207623865605, + "kl": 0.01922607421875, + "learning_rate": 2.172281399971683e-07, + "loss": -0.0285, + "num_tokens": 142845543.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.803665280342102, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.16305162924300523, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.22334098982075185, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10174405069512348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3154 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1294.625, + "completions/mean_terminated_length": 1280.933349609375, + "completions/min_length": 989.0, + "completions/min_terminated_length": 989.0, + "epoch": 0.7889472368092023, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3678308359277676, + "kl": 0.0118865966796875, + "learning_rate": 2.1696384238817002e-07, + "loss": 0.006, + "num_tokens": 142888025.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0336265563964844, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10149466047714674, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0896415254695089, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07302967433402215, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3155 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1400.0, + "completions/max_terminated_length": 1400.0, + "completions/mean_length": 1236.75, + "completions/mean_terminated_length": 1236.75, + "completions/min_length": 1083.0, + "completions/min_terminated_length": 1083.0, + "epoch": 0.7891972993248312, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2401366480368705, + "kl": 0.00804901123046875, + "learning_rate": 2.1669979854165267e-07, + "loss": -0.0198, + "num_tokens": 142938413.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0674614906311035, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0006218542439820653, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10077904633164853, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09339283817414601, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3156 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1033.875, + "completions/mean_terminated_length": 967.2857666015625, + "completions/min_length": 386.0, + "completions/min_terminated_length": 386.0, + "epoch": 0.7894473618404602, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6474614470652886, + "kl": 0.017120361328125, + "learning_rate": 2.1643600865880896e-07, + "loss": -0.0704, + "num_tokens": 142971123.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9793280959129333, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.035351877383931495, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05085423390267886, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17126976771553507, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3157 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1406.0625, + "completions/mean_terminated_length": 1249.5, + "completions/min_length": 829.0, + "completions/min_terminated_length": 829.0, + "epoch": 0.789697424356089, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6728291637998085, + "kl": 0.0192413330078125, + "learning_rate": 2.1617247294063798e-07, + "loss": -0.018, + "num_tokens": 143029124.0, + "reward": 0.0, + "reward_std": 0.6019368171691895, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04746154242285027, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09796591285205866, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09269623828717427, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3158 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1451.0, + "completions/mean_length": 1420.625, + "completions/mean_terminated_length": 1341.25, + "completions/min_length": 1152.0, + "completions/min_terminated_length": 1152.0, + "epoch": 0.7899474868717179, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1278205340509424, + "kl": 0.0172882080078125, + "learning_rate": 2.159091915879453e-07, + "loss": -0.0212, + "num_tokens": 143082246.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9891825914382935, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11171792259596441, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.030422103078154, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921945, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3159 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1435.0, + "completions/mean_length": 1313.3125, + "completions/mean_terminated_length": 1251.0833740234375, + "completions/min_length": 944.0, + "completions/min_terminated_length": 944.0, + "epoch": 0.7901975493873469, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.257422271991821, + "kl": 0.017364501953125, + "learning_rate": 2.156461648013426e-07, + "loss": -0.0269, + "num_tokens": 143134347.0, + "reward": 0.0, + "reward_std": 0.8425611257553101, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05605589464089328, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11918730579504315, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15098442401882486, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3160 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1411.0, + "completions/mean_length": 1328.125, + "completions/mean_terminated_length": 1250.0, + "completions/min_length": 857.0, + "completions/min_terminated_length": 857.0, + "epoch": 0.7904476119029757, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1954579545237096, + "kl": 0.01446533203125, + "learning_rate": 2.153833927812474e-07, + "loss": -0.0281, + "num_tokens": 143178453.0, + "reward": 0.0, + "reward_std": 1.0083447694778442, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.018306422264755026, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07431460655805004, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503963, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3161 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1245.0, + "completions/mean_length": 997.5, + "completions/mean_terminated_length": 964.0000610351562, + "completions/min_length": 553.0, + "completions/min_terminated_length": 553.0, + "epoch": 0.7906976744186046, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.727033830849626, + "kl": 0.02752685546875, + "learning_rate": 2.151208757278833e-07, + "loss": -0.0395, + "num_tokens": 143220437.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.037672758102417, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.13892844209106048, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1829223321409405, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3162 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1265.0625, + "completions/mean_terminated_length": 1231.5, + "completions/min_length": 1009.0, + "completions/min_terminated_length": 1009.0, + "epoch": 0.7909477369342336, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.59972235793177, + "kl": 0.02740478515625, + "learning_rate": 2.1485861384127956e-07, + "loss": -0.0424, + "num_tokens": 143255958.0, + "reward": 0.0, + "reward_std": 0.8862378597259521, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04024686571392534, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06240769711445003, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10878112581387149, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3163 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1398.0, + "completions/max_terminated_length": 1398.0, + "completions/mean_length": 981.125, + "completions/mean_terminated_length": 981.125, + "completions/min_length": 510.0, + "completions/min_terminated_length": 510.0, + "epoch": 0.7911977994498625, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.295560901411648, + "kl": 0.0177001953125, + "learning_rate": 2.1459660732127105e-07, + "loss": 0.0373, + "num_tokens": 143285960.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9460959434509277, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03731616321497193, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1367992771069978, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13214750456578045, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3164 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1241.1875, + "completions/mean_terminated_length": 1204.21435546875, + "completions/min_length": 919.0, + "completions/min_terminated_length": 919.0, + "epoch": 0.7914478619654913, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7871598376802615, + "kl": 0.017822265625, + "learning_rate": 2.1433485636749787e-07, + "loss": -0.0043, + "num_tokens": 143320771.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9838777780532837, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.041553677073196814, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10765190010390557, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09496588081262933, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3165 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1365.25, + "completions/mean_terminated_length": 1320.3333740234375, + "completions/min_length": 1066.0, + "completions/min_terminated_length": 1066.0, + "epoch": 0.7916979244811203, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.122871743809199, + "kl": 0.020263671875, + "learning_rate": 2.1407336117940584e-07, + "loss": 0.0121, + "num_tokens": 143370791.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8444160223007202, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05492321048074916, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10652307883238271, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13763881881375054, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3166 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 1195.5, + "completions/mean_terminated_length": 1094.0, + "completions/min_length": 880.0, + "completions/min_terminated_length": 880.0, + "epoch": 0.7919479869967492, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.655128606854681, + "kl": 0.015960693359375, + "learning_rate": 2.1381212195624554e-07, + "loss": 0.0092, + "num_tokens": 143416975.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0002421140670776, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.035078094466879726, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1137638278214599, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3167 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1229.0, + "completions/max_terminated_length": 1229.0, + "completions/mean_length": 994.0, + "completions/mean_terminated_length": 994.0, + "completions/min_length": 637.0, + "completions/min_terminated_length": 637.0, + "epoch": 0.792198049512378, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.825965640717729, + "kl": 0.0182952880859375, + "learning_rate": 2.1355113889707227e-07, + "loss": -0.0279, + "num_tokens": 143451047.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8616598844528198, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07293389692777551, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.29594314794118814, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3168 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1255.0, + "completions/mean_length": 1252.5625, + "completions/mean_terminated_length": 1060.111083984375, + "completions/min_length": 940.0, + "completions/min_terminated_length": 940.0, + "epoch": 0.792448112028007, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.289900200328695, + "kl": 0.020843505859375, + "learning_rate": 2.1329041220074674e-07, + "loss": 0.0276, + "num_tokens": 143511560.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9759418964385986, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06044196567550103, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0688902569057086, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237132, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3169 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1308.0, + "completions/mean_length": 1277.0, + "completions/mean_terminated_length": 1103.5555419921875, + "completions/min_length": 873.0, + "completions/min_terminated_length": 873.0, + "epoch": 0.7926981745436359, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.885646420917689, + "kl": 0.015380859375, + "learning_rate": 2.1302994206593394e-07, + "loss": 0.0059, + "num_tokens": 143565312.0, + "reward": -1.30385160446167e-08, + "reward_std": 1.0675444602966309, + "rewards/wordcountpos_reward_GEOBench/mean": -1.30385160446167e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1545040784726457, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10557696730841536, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0666666666666667, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3170 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1119.875, + "completions/mean_terminated_length": 993.1666870117188, + "completions/min_length": 740.0, + "completions/min_terminated_length": 740.0, + "epoch": 0.7929482370592649, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.130158634534938, + "kl": 0.01739501953125, + "learning_rate": 2.1276972869110343e-07, + "loss": -0.0041, + "num_tokens": 143618430.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9175781607627869, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.053236921334463776, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0876368469865468, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09727776191382576, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3171 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1212.75, + "completions/mean_terminated_length": 1171.71435546875, + "completions/min_length": 834.0, + "completions/min_terminated_length": 834.0, + "epoch": 0.7931982995748937, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.081587434737391, + "kl": 0.0179901123046875, + "learning_rate": 2.125097722745291e-07, + "loss": -0.0111, + "num_tokens": 143655866.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0647214651107788, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.025744255115061232, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04932901822048628, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16865480854231357, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3172 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1349.0, + "completions/mean_length": 1205.125, + "completions/mean_terminated_length": 1163.0, + "completions/min_length": 800.0, + "completions/min_terminated_length": 800.0, + "epoch": 0.7934483620905226, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0775713550670085, + "kl": 0.016082763671875, + "learning_rate": 2.1225007301428938e-07, + "loss": -0.0037, + "num_tokens": 143709260.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.9004783630371094, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12465439797227042, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1782901607707489, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11021863793455329, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3173 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1265.0, + "completions/max_terminated_length": 1265.0, + "completions/mean_length": 1083.9375, + "completions/mean_terminated_length": 1083.9375, + "completions/min_length": 834.0, + "completions/min_terminated_length": 834.0, + "epoch": 0.7936984246061516, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2084200969303804, + "kl": 0.01934814453125, + "learning_rate": 2.1199063110826616e-07, + "loss": 0.0125, + "num_tokens": 143751331.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8657114505767822, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0139550768619504, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11481125994989683, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3174 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1467.0, + "completions/mean_length": 1385.75, + "completions/mean_terminated_length": 1238.857177734375, + "completions/min_length": 869.0, + "completions/min_terminated_length": 869.0, + "epoch": 0.7939484871217805, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8202047667940935, + "kl": 0.024383544921875, + "learning_rate": 2.117314467541455e-07, + "loss": 0.0065, + "num_tokens": 143809807.0, + "reward": 0.0, + "reward_std": 0.5955390334129333, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03384877392047597, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08896089797375616, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12285191326386657, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3175 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1386.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 1086.8125, + "completions/mean_terminated_length": 1086.8125, + "completions/min_length": 837.0, + "completions/min_terminated_length": 837.0, + "epoch": 0.7941985496374093, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1103364216628093, + "kl": 0.015625, + "learning_rate": 2.1147252014941755e-07, + "loss": 0.022, + "num_tokens": 143841772.0, + "reward": 0.0, + "reward_std": 0.5176241993904114, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.17011516861825748, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08677842709234852, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11213417888437975, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3176 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1058.0, + "completions/max_terminated_length": 1058.0, + "completions/mean_length": 945.9375, + "completions/mean_terminated_length": 945.9375, + "completions/min_length": 798.0, + "completions/min_terminated_length": 798.0, + "epoch": 0.7944486121530383, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.838259637907442, + "kl": 0.012287139892578125, + "learning_rate": 2.112138514913756e-07, + "loss": 0.0118, + "num_tokens": 143880219.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9468961954116821, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011227090110150903, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04860935273689597, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3177 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 1184.4375, + "completions/mean_terminated_length": 1111.615478515625, + "completions/min_length": 846.0, + "completions/min_terminated_length": 846.0, + "epoch": 0.7946986746686672, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9359621303801933, + "kl": 0.0176849365234375, + "learning_rate": 2.109554409771167e-07, + "loss": 0.0134, + "num_tokens": 143928578.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9284367561340332, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.027208730055975832, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03794594963505193, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09803627446568497, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3178 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1230.6875, + "completions/mean_terminated_length": 961.375, + "completions/min_length": 758.0, + "completions/min_terminated_length": 758.0, + "epoch": 0.794948737184296, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2578567346158924, + "kl": 0.018463134765625, + "learning_rate": 2.1069728880354094e-07, + "loss": -0.0457, + "num_tokens": 143971877.0, + "reward": 0.0, + "reward_std": 0.9290722608566284, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11894546085892303, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09815911858191395, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000003, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3179 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1281.0, + "completions/max_terminated_length": 1281.0, + "completions/mean_length": 989.0625, + "completions/mean_terminated_length": 989.0625, + "completions/min_length": 750.0, + "completions/min_terminated_length": 750.0, + "epoch": 0.795198799699925, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.025080802511638, + "kl": 0.020111083984375, + "learning_rate": 2.1043939516735171e-07, + "loss": -0.0013, + "num_tokens": 144015510.0, + "reward": 0.0, + "reward_std": 0.44862520694732666, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.14305381784904306, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09725071687833411, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620105, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3180 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1192.0, + "completions/max_terminated_length": 1192.0, + "completions/mean_length": 955.125, + "completions/mean_terminated_length": 955.125, + "completions/min_length": 577.0, + "completions/min_terminated_length": 577.0, + "epoch": 0.7954488622155539, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2473908070022843, + "kl": 0.015777587890625, + "learning_rate": 2.101817602650554e-07, + "loss": 0.0548, + "num_tokens": 144059008.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.24584731459617615, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005729351964581943, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.26422697935360484, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15962919996504865, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3181 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1447.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1104.8125, + "completions/mean_terminated_length": 1104.8125, + "completions/min_length": 802.0, + "completions/min_terminated_length": 802.0, + "epoch": 0.7956989247311828, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9433641251840164, + "kl": 0.0205078125, + "learning_rate": 2.0992438429296115e-07, + "loss": -0.0239, + "num_tokens": 144098549.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9078606367111206, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0016922523637831431, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0742763541361191, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1002773930432755, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3182 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1401.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1085.75, + "completions/mean_terminated_length": 1085.75, + "completions/min_length": 830.0, + "completions/min_terminated_length": 830.0, + "epoch": 0.7959489872468117, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.267390887414894, + "kl": 0.01739501953125, + "learning_rate": 2.096672674471811e-07, + "loss": -0.0095, + "num_tokens": 144137137.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0557007789611816, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0015824010178899558, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05730614077206509, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11474609652039006, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3183 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1443.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1204.1875, + "completions/mean_terminated_length": 1204.1875, + "completions/min_length": 1068.0, + "completions/min_terminated_length": 1068.0, + "epoch": 0.7961990497624406, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.688225063952256, + "kl": 0.01322174072265625, + "learning_rate": 2.0941040992362968e-07, + "loss": -0.0183, + "num_tokens": 144173084.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.005321741104126, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.008228566230054177, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08903259941804825, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09098229375970789, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3184 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 1194.5625, + "completions/mean_terminated_length": 1174.2000732421875, + "completions/min_length": 860.0, + "completions/min_terminated_length": 860.0, + "epoch": 0.7964491122780695, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.026739526972555, + "kl": 0.017120361328125, + "learning_rate": 2.0915381191802386e-07, + "loss": -0.0275, + "num_tokens": 144217669.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0408138036727905, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0863643744986506, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.061823634283853804, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11666666666666668, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3185 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1255.1875, + "completions/mean_terminated_length": 1220.21435546875, + "completions/min_length": 887.0, + "completions/min_terminated_length": 887.0, + "epoch": 0.7966991747936985, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6105890237558835, + "kl": 0.01381683349609375, + "learning_rate": 2.088974736258824e-07, + "loss": -0.0078, + "num_tokens": 144275648.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.992868185043335, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.15719768855202415, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.23914045129129743, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797315, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 3186 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1318.0, + "completions/max_terminated_length": 1318.0, + "completions/mean_length": 1049.125, + "completions/mean_terminated_length": 1049.125, + "completions/min_length": 745.0, + "completions/min_terminated_length": 745.0, + "epoch": 0.7969492373093273, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7682161532120393, + "kl": 0.024505615234375, + "learning_rate": 2.0864139524252695e-07, + "loss": -0.0552, + "num_tokens": 144314458.0, + "reward": 0.0, + "reward_std": 1.025489091873169, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03570677982972262, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04594968015318742, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12881223774390613, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3187 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1114.5625, + "completions/mean_terminated_length": 1059.5, + "completions/min_length": 718.0, + "completions/min_terminated_length": 718.0, + "epoch": 0.7971992998249562, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1603076954949025, + "kl": 0.0208740234375, + "learning_rate": 2.083855769630806e-07, + "loss": 0.0001, + "num_tokens": 144375355.0, + "reward": 0.0, + "reward_std": 0.850922703742981, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.006487659923616128, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06503751027359121, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1387777332977422, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3188 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1299.4375, + "completions/mean_terminated_length": 1208.272705078125, + "completions/min_length": 933.0, + "completions/min_terminated_length": 933.0, + "epoch": 0.7974493623405852, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7216243154145285, + "kl": 0.01483154296875, + "learning_rate": 2.0813001898246829e-07, + "loss": -0.0038, + "num_tokens": 144431402.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8415766954421997, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011106046779877992, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06825818224108442, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08073734277593311, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3189 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1367.0, + "completions/max_terminated_length": 1367.0, + "completions/mean_length": 920.1875, + "completions/mean_terminated_length": 920.1875, + "completions/min_length": 564.0, + "completions/min_terminated_length": 564.0, + "epoch": 0.797699424856214, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1128557487394333, + "kl": 0.01483154296875, + "learning_rate": 2.0787472149541702e-07, + "loss": 0.0584, + "num_tokens": 144461645.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8699432611465454, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05677670340296387, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0604154561122279, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13080944580232393, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3190 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1409.0, + "completions/mean_length": 1215.5, + "completions/mean_terminated_length": 1120.666748046875, + "completions/min_length": 898.0, + "completions/min_terminated_length": 898.0, + "epoch": 0.797949487371843, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.836106313288438, + "kl": 0.02099609375, + "learning_rate": 2.0761968469645462e-07, + "loss": 0.0203, + "num_tokens": 144516981.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0221346616744995, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1993060157745387, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14387147532761135, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11155467020454342, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3191 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1193.4375, + "completions/mean_terminated_length": 1122.6923828125, + "completions/min_length": 873.0, + "completions/min_terminated_length": 873.0, + "epoch": 0.7981995498874719, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.01322269512708, + "kl": 0.018157958984375, + "learning_rate": 2.0736490877991087e-07, + "loss": -0.0706, + "num_tokens": 144553860.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9782369136810303, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.028962237676310995, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14689350438964074, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09727776191382573, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3192 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1371.0, + "completions/mean_length": 1289.4375, + "completions/mean_terminated_length": 1193.727294921875, + "completions/min_length": 995.0, + "completions/min_terminated_length": 995.0, + "epoch": 0.7984496124031008, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6939409902624347, + "kl": 0.013519287109375, + "learning_rate": 2.0711039393991625e-07, + "loss": -0.0179, + "num_tokens": 144610171.0, + "reward": 4.470348358154297e-08, + "reward_std": 1.023606777191162, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06760863656782412, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08137488243643112, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503961, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3193 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1363.0, + "completions/mean_length": 1358.125, + "completions/mean_terminated_length": 1175.71435546875, + "completions/min_length": 875.0, + "completions/min_terminated_length": 875.0, + "epoch": 0.7986996749187297, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5715477837150287, + "kl": 0.0159454345703125, + "learning_rate": 2.06856140370403e-07, + "loss": -0.1023, + "num_tokens": 144666005.0, + "reward": 0.0, + "reward_std": 0.9789654016494751, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.014002679982691089, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1323868410639956, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.107496769977314, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3194 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1351.0, + "completions/mean_length": 1162.1875, + "completions/mean_terminated_length": 1084.2308349609375, + "completions/min_length": 786.0, + "completions/min_terminated_length": 786.0, + "epoch": 0.7989497374343586, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.210667769475688, + "kl": 0.0085601806640625, + "learning_rate": 2.0660214826510373e-07, + "loss": -0.0026, + "num_tokens": 144698624.0, + "reward": 0.0, + "reward_std": 1.0589599609375, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03349943175151475, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04328893349004597, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12164002752505566, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3195 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1317.0, + "completions/mean_length": 1311.1875, + "completions/mean_terminated_length": 1122.375, + "completions/min_length": 889.0, + "completions/min_terminated_length": 889.0, + "epoch": 0.7991997999499875, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.93503039644484, + "kl": 0.01971435546875, + "learning_rate": 2.063484178175519e-07, + "loss": 0.0057, + "num_tokens": 144755667.0, + "reward": 0.0, + "reward_std": 0.43114322423934937, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04363647326204945, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05478464482801713, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11287488977066928, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3196 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1024.0, + "completions/max_terminated_length": 1024.0, + "completions/mean_length": 821.5, + "completions/mean_terminated_length": 821.5, + "completions/min_length": 669.0, + "completions/min_terminated_length": 669.0, + "epoch": 0.7994498624656164, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.024463588522192, + "kl": 0.016387939453125, + "learning_rate": 2.0609494922108184e-07, + "loss": -0.0187, + "num_tokens": 144794099.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.731658935546875, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03057140664876796, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.26502710837907045, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11474609652039006, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3197 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1311.1875, + "completions/mean_terminated_length": 1197.9000244140625, + "completions/min_length": 939.0, + "completions/min_terminated_length": 939.0, + "epoch": 0.7996999249812453, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6431834936485528, + "kl": 0.0153656005859375, + "learning_rate": 2.0584174266882808e-07, + "loss": -0.0022, + "num_tokens": 144842510.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6004627346992493, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.21845488940006816, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20548451321226235, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408157, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3198 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1376.5, + "completions/mean_terminated_length": 1320.3636474609375, + "completions/min_length": 1175.0, + "completions/min_terminated_length": 1175.0, + "epoch": 0.7999499874968742, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0906220415218884, + "kl": 0.01959228515625, + "learning_rate": 2.0558879835372555e-07, + "loss": 0.018, + "num_tokens": 144896510.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.0688650608062744, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04710569934589606, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0685531119518539, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06763190130459204, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3199 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1250.0, + "completions/mean_length": 1054.375, + "completions/mean_terminated_length": 990.71435546875, + "completions/min_length": 569.0, + "completions/min_terminated_length": 569.0, + "epoch": 0.8002000500125032, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4724899352992415, + "kl": 0.04425048828125, + "learning_rate": 2.053361164685094e-07, + "loss": 0.0175, + "num_tokens": 144942900.0, + "reward": 0.0, + "reward_std": 0.8924879431724548, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0885192929795037, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10667882982098856, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13763881881375054, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3200 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1499.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1242.8125, + "completions/mean_terminated_length": 1242.8125, + "completions/min_length": 527.0, + "completions/min_terminated_length": 527.0, + "epoch": 0.800450112528132, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.271199539238548, + "kl": 0.01641845703125, + "learning_rate": 2.0508369720571503e-07, + "loss": -0.0239, + "num_tokens": 144987753.0, + "reward": 0.0, + "reward_std": 0.992772102355957, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.046477162505427175, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11238213751332747, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036264, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3201 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1410.0, + "completions/mean_length": 1180.5, + "completions/mean_terminated_length": 1159.2000732421875, + "completions/min_length": 939.0, + "completions/min_terminated_length": 939.0, + "epoch": 0.8007001750437609, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0479201592752525, + "kl": 0.020294189453125, + "learning_rate": 2.048315407576774e-07, + "loss": 0.0108, + "num_tokens": 145039241.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7281526327133179, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05004062181601887, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0807269978992697, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10610965676722954, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3202 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1369.0, + "completions/mean_length": 1171.0, + "completions/mean_terminated_length": 1149.0667724609375, + "completions/min_length": 1016.0, + "completions/min_terminated_length": 1016.0, + "epoch": 0.8009502375593899, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1275218870600625, + "kl": 0.0199127197265625, + "learning_rate": 2.0457964731653133e-07, + "loss": -0.0042, + "num_tokens": 145072905.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9732587933540344, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0797276722780427, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1261772548175525, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1253144193766372, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3203 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1482.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1297.6875, + "completions/mean_terminated_length": 1297.6875, + "completions/min_length": 1112.0, + "completions/min_terminated_length": 1112.0, + "epoch": 0.8012003000750187, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2080487279152807, + "kl": 0.0150146484375, + "learning_rate": 2.0432801707421127e-07, + "loss": -0.0354, + "num_tokens": 145124188.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9938091039657593, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.047754336805404, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11382223401882757, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05821416398857662, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3204 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1364.0, + "completions/max_terminated_length": 1364.0, + "completions/mean_length": 1169.25, + "completions/mean_terminated_length": 1169.25, + "completions/min_length": 878.0, + "completions/min_terminated_length": 878.0, + "epoch": 0.8014503625906476, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4873959199854365, + "kl": 0.022735595703125, + "learning_rate": 2.040766502224511e-07, + "loss": -0.0149, + "num_tokens": 145170888.0, + "reward": 0.0, + "reward_std": 0.8605424165725708, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01918318000082682, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04364056551432824, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08933913745655644, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3205 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1399.0, + "completions/mean_length": 1348.0, + "completions/mean_terminated_length": 1229.77783203125, + "completions/min_length": 1107.0, + "completions/min_terminated_length": 1107.0, + "epoch": 0.8017004251062766, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.651048370266668, + "kl": 0.015838623046875, + "learning_rate": 2.03825546952784e-07, + "loss": -0.0195, + "num_tokens": 145217256.0, + "reward": 0.0, + "reward_std": 0.9551332592964172, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009359582485281214, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.049983742517692574, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194862, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3206 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1454.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 1120.6875, + "completions/mean_terminated_length": 1120.6875, + "completions/min_length": 786.0, + "completions/min_terminated_length": 786.0, + "epoch": 0.8019504876219055, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.872612772415288, + "kl": 0.0160369873046875, + "learning_rate": 2.0357470745654212e-07, + "loss": -0.0192, + "num_tokens": 145263963.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.7298794984817505, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0029431639344473645, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.011772655737789458, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1102186379345533, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3207 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1392.0, + "completions/mean_length": 1163.625, + "completions/mean_terminated_length": 1141.2000732421875, + "completions/min_length": 962.0, + "completions/min_terminated_length": 962.0, + "epoch": 0.8022005501375343, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0088810592961153, + "kl": 0.015472412109375, + "learning_rate": 2.033241319248573e-07, + "loss": -0.0391, + "num_tokens": 145314381.0, + "reward": -1.862645149230957e-08, + "reward_std": 1.0175529718399048, + "rewards/wordcountpos_reward_GEOBench/mean": -1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03635117102994749, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08579243004692262, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10610965676722954, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3208 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1292.625, + "completions/mean_terminated_length": 1263.0, + "completions/min_length": 1062.0, + "completions/min_terminated_length": 1062.0, + "epoch": 0.8024506126531633, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7213740722586857, + "kl": 0.0162200927734375, + "learning_rate": 2.0307382054865934e-07, + "loss": 0.0313, + "num_tokens": 145357055.0, + "reward": 0.0, + "reward_std": 0.8605424165725708, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02959258801574812, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0553321981166487, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09496588081262934, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3209 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1344.0, + "completions/mean_length": 1172.0625, + "completions/mean_terminated_length": 1096.3846435546875, + "completions/min_length": 792.0, + "completions/min_terminated_length": 792.0, + "epoch": 0.8027006751687922, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2230882318455665, + "kl": 0.0213623046875, + "learning_rate": 2.0282377351867722e-07, + "loss": -0.0096, + "num_tokens": 145401232.0, + "reward": 0.0, + "reward_std": 0.5246283411979675, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09000244138913852, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13939115715648523, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09953596037316066, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3210 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1111.625, + "completions/mean_terminated_length": 1085.7333984375, + "completions/min_length": 885.0, + "completions/min_terminated_length": 885.0, + "epoch": 0.8029507376844212, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.535007115717088, + "kl": 0.01332855224609375, + "learning_rate": 2.0257399102543861e-07, + "loss": -0.0333, + "num_tokens": 145447482.0, + "reward": 0.0, + "reward_std": 0.8483057022094727, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.13929865594749558, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.25274986789245973, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06309898162000306, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3211 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1369.0, + "completions/mean_length": 1169.625, + "completions/mean_terminated_length": 1147.60009765625, + "completions/min_length": 801.0, + "completions/min_terminated_length": 801.0, + "epoch": 0.80320080020005, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9861651949419437, + "kl": 0.0167999267578125, + "learning_rate": 2.0232447325926948e-07, + "loss": -0.0404, + "num_tokens": 145486220.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7228039503097534, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.20501300022846963, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.26368873599215026, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087679, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3212 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1307.0, + "completions/max_terminated_length": 1307.0, + "completions/mean_length": 860.0, + "completions/mean_terminated_length": 860.0, + "completions/min_length": 563.0, + "completions/min_terminated_length": 563.0, + "epoch": 0.8034508627156789, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8815234316593368, + "kl": 0.0159454345703125, + "learning_rate": 2.0207522041029402e-07, + "loss": -0.0539, + "num_tokens": 145512124.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0187525749206543, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.026955658285375238, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1688478647969476, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715323, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3213 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1156.125, + "completions/mean_terminated_length": 1133.2000732421875, + "completions/min_length": 900.0, + "completions/min_terminated_length": 900.0, + "epoch": 0.8037009252313079, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.386562246591703, + "kl": 0.013519287109375, + "learning_rate": 2.0182623266843456e-07, + "loss": -0.0125, + "num_tokens": 145556422.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7055084705352783, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.025934551068169838, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06288684081160914, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3214 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1243.0, + "completions/mean_length": 1221.0625, + "completions/mean_terminated_length": 1053.7000732421875, + "completions/min_length": 878.0, + "completions/min_terminated_length": 878.0, + "epoch": 0.8039509877469367, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7612040090200844, + "kl": 0.014129638671875, + "learning_rate": 2.015775102234116e-07, + "loss": 0.0199, + "num_tokens": 145596127.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8153742551803589, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.016353823969529776, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.023922651323037905, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10749676997731401, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3215 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1375.0, + "completions/max_terminated_length": 1375.0, + "completions/mean_length": 1151.6875, + "completions/mean_terminated_length": 1151.6875, + "completions/min_length": 845.0, + "completions/min_terminated_length": 845.0, + "epoch": 0.8042010502625656, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.129455483248747, + "kl": 0.0150604248046875, + "learning_rate": 2.0132905326474324e-07, + "loss": 0.0064, + "num_tokens": 145639738.0, + "reward": 0.0, + "reward_std": 0.8351696729660034, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08520889592777506, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17600311664699841, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1172998689652263, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3216 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1353.0, + "completions/mean_length": 1284.9375, + "completions/mean_terminated_length": 1069.875, + "completions/min_length": 876.0, + "completions/min_terminated_length": 876.0, + "epoch": 0.8044511127781946, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.140150107686458, + "kl": 0.0171661376953125, + "learning_rate": 2.010808619817454e-07, + "loss": -0.0384, + "num_tokens": 145695873.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.018269658088684, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00030253805662175913, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09774817780672931, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1413558682244267, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3217 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1312.875, + "completions/mean_terminated_length": 1072.2857666015625, + "completions/min_length": 691.0, + "completions/min_terminated_length": 691.0, + "epoch": 0.8047011752938235, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8399682822192625, + "kl": 0.020843505859375, + "learning_rate": 2.0083293656353178e-07, + "loss": -0.0461, + "num_tokens": 145738935.0, + "reward": 0.0, + "reward_std": 0.8935556411743164, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.056599536545909934, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07395664842647483, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10809803506625451, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3218 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1490.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1185.0625, + "completions/mean_terminated_length": 1185.0625, + "completions/min_length": 926.0, + "completions/min_terminated_length": 926.0, + "epoch": 0.8049512378094523, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1271108865234987, + "kl": 0.0155487060546875, + "learning_rate": 2.0058527719901321e-07, + "loss": 0.0107, + "num_tokens": 145786360.0, + "reward": -5.587935447692871e-09, + "reward_std": 0.9988998770713806, + "rewards/wordcountpos_reward_GEOBench/mean": -5.587935447692871e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.20142676029821563, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1870120502714994, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13305526559931294, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3219 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1228.0, + "completions/mean_terminated_length": 1165.2308349609375, + "completions/min_length": 900.0, + "completions/min_terminated_length": 900.0, + "epoch": 0.8052013003250813, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.097290769667702, + "kl": 0.0164794921875, + "learning_rate": 2.003378840768979e-07, + "loss": -0.1005, + "num_tokens": 145828088.0, + "reward": 0.0, + "reward_std": 0.7562370300292969, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.008309918610394118, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04447321937744504, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07685966046898336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3220 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1270.0, + "completions/max_terminated_length": 1270.0, + "completions/mean_length": 1006.0, + "completions/mean_terminated_length": 1006.0, + "completions/min_length": 712.0, + "completions/min_terminated_length": 712.0, + "epoch": 0.8054513628407102, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8674246060464754, + "kl": 0.022247314453125, + "learning_rate": 2.0009075738569114e-07, + "loss": -0.0073, + "num_tokens": 145864704.0, + "reward": 0.0, + "reward_std": 0.7859272360801697, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01888432864413204, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07625263377796516, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238707, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3221 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1293.9375, + "completions/mean_terminated_length": 1246.3846435546875, + "completions/min_length": 917.0, + "completions/min_terminated_length": 917.0, + "epoch": 0.805701425356339, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.953632383956531, + "kl": 0.01568603515625, + "learning_rate": 1.9984389731369534e-07, + "loss": 0.0142, + "num_tokens": 145919415.0, + "reward": 0.0, + "reward_std": 0.5367890000343323, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1633516575900111, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07804635229241287, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10390522747338701, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3222 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1367.0, + "completions/max_terminated_length": 1367.0, + "completions/mean_length": 1107.9375, + "completions/mean_terminated_length": 1107.9375, + "completions/min_length": 912.0, + "completions/min_terminated_length": 912.0, + "epoch": 0.805951487871968, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.816837921615872, + "kl": 0.026824951171875, + "learning_rate": 1.9959730404900954e-07, + "loss": 0.0024, + "num_tokens": 145970142.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.012406587600708, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03728286383004419, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10880367277935554, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10886621079036347, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3223 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1228.0625, + "completions/mean_terminated_length": 1189.21435546875, + "completions/min_length": 728.0, + "completions/min_terminated_length": 728.0, + "epoch": 0.8062015503875969, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.958715861113511, + "kl": 0.0171356201171875, + "learning_rate": 1.9935097777952958e-07, + "loss": -0.0599, + "num_tokens": 146018423.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0508325099945068, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.020995831760003997, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04182260429634072, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12292725943057184, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3224 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1172.0, + "completions/max_terminated_length": 1172.0, + "completions/mean_length": 987.0, + "completions/mean_terminated_length": 987.0, + "completions/min_length": 740.0, + "completions/min_terminated_length": 740.0, + "epoch": 0.8064516129032258, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8876491362341863, + "kl": 0.026519775390625, + "learning_rate": 1.991049186929481e-07, + "loss": -0.0427, + "num_tokens": 146066775.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.012366771697998, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.029040547159825562, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05403878751331373, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8250000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3225 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1317.0, + "completions/mean_length": 1073.875, + "completions/mean_terminated_length": 931.8333740234375, + "completions/min_length": 718.0, + "completions/min_terminated_length": 718.0, + "epoch": 0.8067016754188547, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.578428585683651, + "kl": 0.019744873046875, + "learning_rate": 1.9885912697675398e-07, + "loss": 0.013, + "num_tokens": 146109901.0, + "reward": 0.0, + "reward_std": 1.0203512907028198, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.047222376896318086, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09012234161555281, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14907119849998599, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3226 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1311.0, + "completions/mean_length": 1174.5, + "completions/mean_terminated_length": 1152.800048828125, + "completions/min_length": 808.0, + "completions/min_terminated_length": 808.0, + "epoch": 0.8069517379344836, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2646843049358267, + "kl": 0.0162811279296875, + "learning_rate": 1.9861360281823207e-07, + "loss": -0.0023, + "num_tokens": 146162821.0, + "reward": 0.0, + "reward_std": 0.9612037539482117, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03390386411954263, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08848531983499991, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11155467020454343, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3227 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1231.0, + "completions/max_terminated_length": 1231.0, + "completions/mean_length": 993.75, + "completions/mean_terminated_length": 993.75, + "completions/min_length": 845.0, + "completions/min_terminated_length": 845.0, + "epoch": 0.8072018004501126, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5155119038213307, + "kl": 0.019378662109375, + "learning_rate": 1.9836834640446358e-07, + "loss": -0.0117, + "num_tokens": 146197177.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8122259378433228, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06021811352921485, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11972850968843879, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133132, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3228 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1118.6875, + "completions/mean_terminated_length": 1093.2667236328125, + "completions/min_length": 843.0, + "completions/min_terminated_length": 843.0, + "epoch": 0.8074518629657415, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.6373821044745098, + "kl": 0.00730133056640625, + "learning_rate": 1.9812335792232605e-07, + "loss": 0.0042, + "num_tokens": 146227628.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9053102731704712, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.026828869263904688, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12857632884344036, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13526380260918403, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 3229 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1350.8125, + "completions/mean_terminated_length": 1301.0833740234375, + "completions/min_length": 850.0, + "completions/min_terminated_length": 850.0, + "epoch": 0.8077019254813703, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3793140101546904, + "kl": 0.0150299072265625, + "learning_rate": 1.9787863755849245e-07, + "loss": -0.0043, + "num_tokens": 146276289.0, + "reward": 0.0, + "reward_std": 0.9378127455711365, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01285416540671331, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06170616790929077, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298359, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3230 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1262.375, + "completions/mean_terminated_length": 1119.800048828125, + "completions/min_length": 970.0, + "completions/min_terminated_length": 970.0, + "epoch": 0.8079519879969993, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0780207328604026, + "kl": 0.02325439453125, + "learning_rate": 1.976341854994315e-07, + "loss": -0.0072, + "num_tokens": 146330631.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8828178644180298, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.026592063288651407, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08191506209838145, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3231 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1429.0, + "completions/max_terminated_length": 1429.0, + "completions/mean_length": 1188.75, + "completions/mean_terminated_length": 1188.75, + "completions/min_length": 866.0, + "completions/min_terminated_length": 866.0, + "epoch": 0.8082020505126282, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.523745038544158, + "kl": 0.023529052734375, + "learning_rate": 1.9739000193140754e-07, + "loss": -0.037, + "num_tokens": 146367947.0, + "reward": 0.0, + "reward_std": 0.7252894639968872, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05975160160679702, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05889253583874246, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12041594578792297, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3232 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1021.0, + "completions/max_terminated_length": 1021.0, + "completions/mean_length": 811.75, + "completions/mean_terminated_length": 811.75, + "completions/min_length": 580.0, + "completions/min_terminated_length": 580.0, + "epoch": 0.808452113028257, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.844513419759337, + "kl": 0.013698577880859375, + "learning_rate": 1.9714608704048035e-07, + "loss": -0.0072, + "num_tokens": 146400095.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.6178338527679443, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09372720544663458, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10015624360577484, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941139, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3233 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1433.4375, + "completions/mean_terminated_length": 1381.6666259765625, + "completions/min_length": 1034.0, + "completions/min_terminated_length": 1034.0, + "epoch": 0.808702175543886, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.541100226923047, + "kl": 0.0153045654296875, + "learning_rate": 1.9690244101250502e-07, + "loss": 0.0092, + "num_tokens": 146453062.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9293874502182007, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0174368417682068, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04591614698805135, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823629, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3234 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1396.0, + "completions/mean_length": 1157.875, + "completions/mean_terminated_length": 1135.0667724609375, + "completions/min_length": 790.0, + "completions/min_terminated_length": 790.0, + "epoch": 0.8089522380595149, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9165676843794133, + "kl": 0.020172119140625, + "learning_rate": 1.9665906403313154e-07, + "loss": -0.0089, + "num_tokens": 146485068.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5977561473846436, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.3987378410328666, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.420787466417693, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3235 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1368.0, + "completions/mean_length": 1110.375, + "completions/mean_terminated_length": 1084.4000244140625, + "completions/min_length": 949.0, + "completions/min_terminated_length": 949.0, + "epoch": 0.8092023005751438, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4113786833628947, + "kl": 0.0129547119140625, + "learning_rate": 1.9641595628780533e-07, + "loss": -0.0467, + "num_tokens": 146532058.0, + "reward": -1.1175870895385742e-08, + "reward_std": 0.906227707862854, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1777895470894278, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09419820096196839, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08933913745655643, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3236 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1121.0, + "completions/max_terminated_length": 1121.0, + "completions/mean_length": 760.6875, + "completions/mean_terminated_length": 760.6875, + "completions/min_length": 420.0, + "completions/min_terminated_length": 420.0, + "epoch": 0.8094523630907727, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.899970345932244, + "kl": 0.01116943359375, + "learning_rate": 1.9617311796176636e-07, + "loss": -0.0713, + "num_tokens": 146557085.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6165703535079956, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05018452433675187, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11723994507719117, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3237 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1290.0, + "completions/max_terminated_length": 1290.0, + "completions/mean_length": 976.1875, + "completions/mean_terminated_length": 976.1875, + "completions/min_length": 852.0, + "completions/min_terminated_length": 852.0, + "epoch": 0.8097024256064016, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5254133390954236, + "kl": 0.01371002197265625, + "learning_rate": 1.9593054924004926e-07, + "loss": -0.034, + "num_tokens": 146588880.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0520082712173462, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010950447432313888, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.028773311725108153, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026005, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3238 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1268.0625, + "completions/mean_terminated_length": 1190.75, + "completions/min_length": 946.0, + "completions/min_terminated_length": 946.0, + "epoch": 0.8099524881220305, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.953374889702204, + "kl": 0.022796630859375, + "learning_rate": 1.9568825030748331e-07, + "loss": -0.0045, + "num_tokens": 146641353.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.44045233726501465, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06391340769801859, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10707141296392671, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1270024788326182, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3239 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1319.0, + "completions/mean_length": 976.0, + "completions/mean_terminated_length": 941.0667114257812, + "completions/min_length": 616.0, + "completions/min_terminated_length": 616.0, + "epoch": 0.8102025506376594, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9551493920173857, + "kl": 0.01568603515625, + "learning_rate": 1.9544622134869239e-07, + "loss": -0.0716, + "num_tokens": 146672281.0, + "reward": 0.0, + "reward_std": 0.6523824334144592, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.027733921956849678, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.060752801319890706, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1354690069789096, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3240 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1179.375, + "completions/mean_terminated_length": 1158.0, + "completions/min_length": 820.0, + "completions/min_terminated_length": 820.0, + "epoch": 0.8104526131532883, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3828272993172295, + "kl": 0.02252197265625, + "learning_rate": 1.9520446254809436e-07, + "loss": -0.0123, + "num_tokens": 146708999.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8354820013046265, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.020113229507930513, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04621823372093964, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15371932093796678, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3241 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1361.0, + "completions/max_terminated_length": 1361.0, + "completions/mean_length": 983.3125, + "completions/mean_terminated_length": 983.3125, + "completions/min_length": 579.0, + "completions/min_terminated_length": 579.0, + "epoch": 0.8107026756689172, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8485778785126485, + "kl": 0.00963592529296875, + "learning_rate": 1.949629740899014e-07, + "loss": -0.0116, + "num_tokens": 146746620.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9728284478187561, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1072505190650651, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10395915136835146, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408157, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3242 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1418.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 1099.0625, + "completions/mean_terminated_length": 1099.0625, + "completions/min_length": 915.0, + "completions/min_terminated_length": 915.0, + "epoch": 0.8109527381845462, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.71779976182762, + "kl": 0.02496337890625, + "learning_rate": 1.9472175615811993e-07, + "loss": -0.0049, + "num_tokens": 146800181.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8076139688491821, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04487977009419838, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20777153863679915, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12524050936172845, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3243 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1297.0, + "completions/max_terminated_length": 1297.0, + "completions/mean_length": 1115.0, + "completions/mean_terminated_length": 1115.0, + "completions/min_length": 890.0, + "completions/min_terminated_length": 890.0, + "epoch": 0.811202800700175, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0641727350556938, + "kl": 0.025787353515625, + "learning_rate": 1.9448080893654982e-07, + "loss": -0.0438, + "num_tokens": 146851869.0, + "reward": 0.0, + "reward_std": 0.9514057636260986, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.2787881625472985, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3855543336974089, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3244 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1489.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1205.8125, + "completions/mean_terminated_length": 1205.8125, + "completions/min_length": 1064.0, + "completions/min_terminated_length": 1064.0, + "epoch": 0.8114528632158039, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.808589852706854, + "kl": 0.018524169921875, + "learning_rate": 1.9424013260878463e-07, + "loss": -0.0276, + "num_tokens": 146899426.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0277570486068726, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05656358412555617, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.095975436499382, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07302967433402213, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3245 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1315.0, + "completions/mean_length": 1149.0625, + "completions/mean_terminated_length": 1125.666748046875, + "completions/min_length": 738.0, + "completions/min_terminated_length": 738.0, + "epoch": 0.8117029257314329, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4789359575984324, + "kl": 0.0252685546875, + "learning_rate": 1.9399972735821212e-07, + "loss": -0.0023, + "num_tokens": 146951611.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0340464115142822, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0365718059085005, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09065783489928489, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3246 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1346.0, + "completions/mean_length": 1094.1875, + "completions/mean_terminated_length": 850.7000122070312, + "completions/min_length": 487.0, + "completions/min_terminated_length": 487.0, + "epoch": 0.8119529882470617, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6149787686332857, + "kl": 0.02166748046875, + "learning_rate": 1.9375959336801283e-07, + "loss": 0.022, + "num_tokens": 146986470.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.0289937257766724, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.008859984513835094, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.035439938055340375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09727776191382573, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3247 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1432.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1144.875, + "completions/mean_terminated_length": 1144.875, + "completions/min_length": 888.0, + "completions/min_terminated_length": 888.0, + "epoch": 0.8122030507626907, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.37143165172092, + "kl": 0.0184478759765625, + "learning_rate": 1.93519730821161e-07, + "loss": 0.0062, + "num_tokens": 147030892.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0149891376495361, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09725811608170704, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.048434998178978506, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13763881881375054, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3248 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1211.6875, + "completions/mean_terminated_length": 1170.5, + "completions/min_length": 790.0, + "completions/min_terminated_length": 790.0, + "epoch": 0.8124531132783196, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.461189273584076, + "kl": 0.0158538818359375, + "learning_rate": 1.932801399004239e-07, + "loss": -0.0126, + "num_tokens": 147066671.0, + "reward": 0.0, + "reward_std": 0.7449402213096619, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.049063457964681506, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12282806794874657, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036262, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3249 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1447.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1187.125, + "completions/mean_terminated_length": 1187.125, + "completions/min_length": 800.0, + "completions/min_terminated_length": 800.0, + "epoch": 0.8127031757939485, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2743403032841165, + "kl": 0.01898193359375, + "learning_rate": 1.9304082078836175e-07, + "loss": -0.0447, + "num_tokens": 147116249.0, + "reward": 0.0, + "reward_std": 1.0200839042663574, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1360975801724132, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1555199063560707, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15587269259333494, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3250 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1457.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1013.6875, + "completions/mean_terminated_length": 1013.6875, + "completions/min_length": 502.0, + "completions/min_terminated_length": 502.0, + "epoch": 0.8129532383095774, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.680059265927026, + "kl": 0.014984130859375, + "learning_rate": 1.9280177366732792e-07, + "loss": -0.1041, + "num_tokens": 147156004.0, + "reward": 0.0, + "reward_std": 0.8130615949630737, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.038573059775539695, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08309908020352066, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066474, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3251 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1432.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 874.4375, + "completions/mean_terminated_length": 874.4375, + "completions/min_length": 515.0, + "completions/min_terminated_length": 515.0, + "epoch": 0.8132033008252063, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.412878801522868, + "kl": 0.0159454345703125, + "learning_rate": 1.9256299871946809e-07, + "loss": 0.0127, + "num_tokens": 147194035.0, + "reward": 0.0, + "reward_std": 0.916688084602356, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.012898899956900325, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.036246503715134056, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353545, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3252 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1057.0, + "completions/mean_length": 1244.25, + "completions/mean_terminated_length": 988.5, + "completions/min_length": 696.0, + "completions/min_terminated_length": 696.0, + "epoch": 0.8134533633408352, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.134311447534594, + "kl": 0.013427734375, + "learning_rate": 1.9232449612672113e-07, + "loss": -0.0069, + "num_tokens": 147242423.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9510148763656616, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04649320676835371, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17275461633273648, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901862, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3253 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.9375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1244.0, + "completions/mean_length": 1484.0, + "completions/mean_terminated_length": 1244.0, + "completions/min_length": 1244.0, + "completions/min_terminated_length": 1244.0, + "epoch": 0.8137034258564642, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.303351813076607, + "kl": 0.0123291015625, + "learning_rate": 1.9208626607081802e-07, + "loss": 0.0119, + "num_tokens": 147300327.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.48237964510917664, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05973641426636668, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06169550336420941, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11287488977066931, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3254 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1233.0, + "completions/mean_length": 999.6875, + "completions/mean_terminated_length": 966.3333740234375, + "completions/min_length": 687.0, + "completions/min_terminated_length": 687.0, + "epoch": 0.813953488372093, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.510739415407291, + "kl": 0.0193023681640625, + "learning_rate": 1.9184830873328208e-07, + "loss": -0.0139, + "num_tokens": 147335066.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.55860435962677, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09144373763217571, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09885378598444206, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11155467020454343, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3255 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1048.4375, + "completions/mean_terminated_length": 1018.3333740234375, + "completions/min_length": 741.0, + "completions/min_terminated_length": 741.0, + "epoch": 0.8142035508877219, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2327260786625214, + "kl": 0.015411376953125, + "learning_rate": 1.9161062429542897e-07, + "loss": 0.0511, + "num_tokens": 147369257.0, + "reward": 0.0, + "reward_std": 0.8906115293502808, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06648934173826611, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.050719806816621575, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797315, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3256 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1285.0625, + "completions/mean_terminated_length": 1235.4615478515625, + "completions/min_length": 986.0, + "completions/min_terminated_length": 986.0, + "epoch": 0.8144536134033509, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2335743461266153, + "kl": 0.0196533203125, + "learning_rate": 1.913732129383662e-07, + "loss": 0.0409, + "num_tokens": 147425778.0, + "reward": 0.0, + "reward_std": 0.715915858745575, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.008773297299440274, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05630667924477909, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1338185615204685, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3257 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1180.5625, + "completions/mean_terminated_length": 1159.2667236328125, + "completions/min_length": 784.0, + "completions/min_terminated_length": 784.0, + "epoch": 0.8147036759189797, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1786642874916216, + "kl": 0.01776123046875, + "learning_rate": 1.9113607484299342e-07, + "loss": -0.0411, + "num_tokens": 147476667.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9631321430206299, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.020483108269192626, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0892080659556726, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11409872268574492, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3258 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1232.125, + "completions/mean_terminated_length": 1170.3077392578125, + "completions/min_length": 951.0, + "completions/min_terminated_length": 951.0, + "epoch": 0.8149537384346086, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1235489836303483, + "kl": 0.0142059326171875, + "learning_rate": 1.908992101900019e-07, + "loss": 0.0249, + "num_tokens": 147515413.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9671287536621094, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1367628186603719, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08530712459103458, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042256, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3259 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1097.25, + "completions/mean_terminated_length": 1004.3077392578125, + "completions/min_length": 701.0, + "completions/min_terminated_length": 701.0, + "epoch": 0.8152038009502376, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8069640132718208, + "kl": 0.018096923828125, + "learning_rate": 1.906626191598748e-07, + "loss": -0.0084, + "num_tokens": 147560505.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0260474681854248, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0286935748563192, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11598889276326656, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13045504405165223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3260 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1216.0, + "completions/max_terminated_length": 1216.0, + "completions/mean_length": 967.6875, + "completions/mean_terminated_length": 967.6875, + "completions/min_length": 824.0, + "completions/min_terminated_length": 824.0, + "epoch": 0.8154538634658665, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.599886225099399, + "kl": 0.022216796875, + "learning_rate": 1.9042630193288656e-07, + "loss": -0.0013, + "num_tokens": 147603500.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7457013130187988, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03953035642180973, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0877345949262724, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1261979632400061, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3261 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1211.4375, + "completions/mean_terminated_length": 1144.84619140625, + "completions/min_length": 918.0, + "completions/min_terminated_length": 918.0, + "epoch": 0.8157039259814953, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1846266079456984, + "kl": 0.02044677734375, + "learning_rate": 1.9019025868910296e-07, + "loss": -0.0595, + "num_tokens": 147648755.0, + "reward": 0.0, + "reward_std": 0.5269179344177246, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.017922195775846292, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.033546992464805844, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14375905768565217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3262 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1411.0, + "completions/mean_length": 1381.125, + "completions/mean_terminated_length": 1262.25, + "completions/min_length": 1108.0, + "completions/min_terminated_length": 1108.0, + "epoch": 0.8159539884971243, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1303753537492307, + "kl": 0.0157012939453125, + "learning_rate": 1.8995448960838095e-07, + "loss": 0.0135, + "num_tokens": 147709093.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0519099235534668, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1290289965639852, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2574687451374203, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3263 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1373.0, + "completions/mean_length": 1202.375, + "completions/mean_terminated_length": 1103.166748046875, + "completions/min_length": 904.0, + "completions/min_terminated_length": 904.0, + "epoch": 0.8162040510127532, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.346834835254651, + "kl": 0.01678466796875, + "learning_rate": 1.8971899487036914e-07, + "loss": 0.0106, + "num_tokens": 147755699.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.973486065864563, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.006057913493773513, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07446800745700644, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3264 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1140.375, + "completions/mean_terminated_length": 976.9091186523438, + "completions/min_length": 585.0, + "completions/min_terminated_length": 585.0, + "epoch": 0.816454113528382, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.034833343078814, + "kl": 0.010162353515625, + "learning_rate": 1.8948377465450644e-07, + "loss": -0.0477, + "num_tokens": 147803233.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9749085903167725, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1456298678545449, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17981130341756993, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590963, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3265 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1310.0, + "completions/max_terminated_length": 1310.0, + "completions/mean_length": 1062.8125, + "completions/mean_terminated_length": 1062.8125, + "completions/min_length": 788.0, + "completions/min_terminated_length": 788.0, + "epoch": 0.816704176044011, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.876750537716946, + "kl": 0.0185394287109375, + "learning_rate": 1.8924882914002277e-07, + "loss": -0.0001, + "num_tokens": 147848078.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7167302370071411, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.030300799855037842, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09067719595874835, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0850925422157591, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3266 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1294.375, + "completions/mean_terminated_length": 1200.9091796875, + "completions/min_length": 963.0, + "completions/min_terminated_length": 963.0, + "epoch": 0.8169542385596399, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9670546372566466, + "kl": 0.0206298828125, + "learning_rate": 1.8901415850593915e-07, + "loss": -0.041, + "num_tokens": 147904956.0, + "reward": -3.725290298461914e-09, + "reward_std": 0.9995211362838745, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0211539146814556, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07702471766738442, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11474609652039007, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3267 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1319.375, + "completions/mean_terminated_length": 1237.272705078125, + "completions/min_length": 1043.0, + "completions/min_terminated_length": 1043.0, + "epoch": 0.8172043010752689, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4990128654832278, + "kl": 0.0155487060546875, + "learning_rate": 1.8877976293106646e-07, + "loss": -0.0143, + "num_tokens": 147959218.0, + "reward": 0.0, + "reward_std": 0.49567848443984985, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05049581056888722, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0661158297132924, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11729986896522632, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3268 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1342.75, + "completions/mean_terminated_length": 1220.4444580078125, + "completions/min_length": 807.0, + "completions/min_terminated_length": 807.0, + "epoch": 0.8174543635908977, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.495552040178613, + "kl": 0.0131988525390625, + "learning_rate": 1.8854564259400642e-07, + "loss": 0.0236, + "num_tokens": 148001974.0, + "reward": 0.0, + "reward_std": 0.8942161798477173, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0283564323693785, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0500826818015788, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3269 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 1381.875, + "completions/mean_terminated_length": 1230.0, + "completions/min_length": 1015.0, + "completions/min_terminated_length": 1015.0, + "epoch": 0.8177044261065266, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0517754212774717, + "kl": 0.019256591796875, + "learning_rate": 1.883117976731508e-07, + "loss": 0.0293, + "num_tokens": 148059260.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5483676195144653, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.018919371574703067, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1018544958925596, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10878112581387146, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3270 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1097.9375, + "completions/mean_terminated_length": 1071.1334228515625, + "completions/min_length": 652.0, + "completions/min_terminated_length": 652.0, + "epoch": 0.8179544886221556, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0088094289320555, + "kl": 0.01849365234375, + "learning_rate": 1.8807822834668193e-07, + "loss": -0.0513, + "num_tokens": 148096539.0, + "reward": 0.0, + "reward_std": 0.6048824191093445, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.15194236783882537, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13069867421729095, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0768596604689834, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3271 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1003.6875, + "completions/mean_terminated_length": 970.6000366210938, + "completions/min_length": 542.0, + "completions/min_terminated_length": 542.0, + "epoch": 0.8182045511377845, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.209142196874678, + "kl": 0.021026611328125, + "learning_rate": 1.8784493479257185e-07, + "loss": -0.0487, + "num_tokens": 148132230.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5184057354927063, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1221746390760832, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1524675365961799, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05821416398857662, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3272 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1117.0, + "completions/mean_length": 1193.375, + "completions/mean_terminated_length": 886.75, + "completions/min_length": 798.0, + "completions/min_terminated_length": 798.0, + "epoch": 0.8184546136534133, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8116280790384414, + "kl": 0.0159149169921875, + "learning_rate": 1.8761191718858235e-07, + "loss": -0.0313, + "num_tokens": 148177836.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.6271463632583618, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06346318782955197, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10292204727008213, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15869840952317446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3273 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1031.1875, + "completions/mean_terminated_length": 923.0000610351562, + "completions/min_length": 592.0, + "completions/min_terminated_length": 592.0, + "epoch": 0.8187046761690423, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.327984082333208, + "kl": 0.0152740478515625, + "learning_rate": 1.8737917571226513e-07, + "loss": 0.0048, + "num_tokens": 148224439.0, + "reward": 0.0, + "reward_std": 0.9217996597290039, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11031299356521339, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11238449632989167, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316813, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3274 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1456.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1177.375, + "completions/mean_terminated_length": 1177.375, + "completions/min_length": 993.0, + "completions/min_terminated_length": 993.0, + "epoch": 0.8189547386846712, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7998562050785107, + "kl": 0.0117340087890625, + "learning_rate": 1.8714671054096154e-07, + "loss": -0.004, + "num_tokens": 148265725.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8760861754417419, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.016860676385433284, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06037762938002553, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921948, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3275 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1379.0, + "completions/mean_length": 1339.0625, + "completions/mean_terminated_length": 1178.125, + "completions/min_length": 1058.0, + "completions/min_terminated_length": 1058.0, + "epoch": 0.8192048012003, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8345416004522184, + "kl": 0.0186614990234375, + "learning_rate": 1.8691452185180223e-07, + "loss": 0.0155, + "num_tokens": 148315710.0, + "reward": 0.0, + "reward_std": 0.7532371282577515, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03020699862665563, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0739328767105604, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026001, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 3276 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1384.375, + "completions/mean_terminated_length": 1294.4444580078125, + "completions/min_length": 1049.0, + "completions/min_terminated_length": 1049.0, + "epoch": 0.819454863715929, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8836816588895946, + "kl": 0.0203857421875, + "learning_rate": 1.866826098217071e-07, + "loss": -0.0047, + "num_tokens": 148356804.0, + "reward": 0.0, + "reward_std": 0.907525360584259, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05742081145705741, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06725962423194967, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3277 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1288.0, + "completions/max_terminated_length": 1288.0, + "completions/mean_length": 1096.0625, + "completions/mean_terminated_length": 1096.0625, + "completions/min_length": 907.0, + "completions/min_terminated_length": 907.0, + "epoch": 0.8197049262315579, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.285010077465129, + "kl": 0.02496337890625, + "learning_rate": 1.8645097462738578e-07, + "loss": -0.0214, + "num_tokens": 148397229.0, + "reward": 0.0, + "reward_std": 1.0429174900054932, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08601589721963625, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11304736350844449, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08595864638818418, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3278 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1210.1875, + "completions/mean_terminated_length": 984.7777709960938, + "completions/min_length": 787.0, + "completions/min_terminated_length": 787.0, + "epoch": 0.8199549887471868, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2919073015001272, + "kl": 0.00972747802734375, + "learning_rate": 1.862196164453365e-07, + "loss": 0.0133, + "num_tokens": 148448944.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.723388135433197, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.015472831871702725, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19331806741762564, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07685966046898339, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3279 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1358.0, + "completions/mean_length": 1204.625, + "completions/mean_terminated_length": 1162.4285888671875, + "completions/min_length": 877.0, + "completions/min_terminated_length": 877.0, + "epoch": 0.8202050512628157, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9349860754413055, + "kl": 0.0185089111328125, + "learning_rate": 1.8598853545184617e-07, + "loss": 0.0574, + "num_tokens": 148485586.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9415729641914368, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08846066927073572, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12634781625397823, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1029203215725281, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3280 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1275.75, + "completions/mean_terminated_length": 1201.0, + "completions/min_length": 992.0, + "completions/min_terminated_length": 992.0, + "epoch": 0.8204551137784446, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7325548761435683, + "kl": 0.0196990966796875, + "learning_rate": 1.857577318229912e-07, + "loss": 0.0053, + "num_tokens": 148535854.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0304043292999268, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02913893789767058, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11446253577567869, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078611, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3281 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1267.5, + "completions/mean_terminated_length": 1234.2857666015625, + "completions/min_length": 1010.0, + "completions/min_terminated_length": 1010.0, + "epoch": 0.8207051762940735, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.640643893774039, + "kl": 0.021697998046875, + "learning_rate": 1.8552720573463614e-07, + "loss": -0.0137, + "num_tokens": 148573030.0, + "reward": 0.0, + "reward_std": 0.8453119397163391, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.046400563990693894, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07167799342770591, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242309, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3282 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1367.0, + "completions/mean_length": 1288.9375, + "completions/mean_terminated_length": 1162.300048828125, + "completions/min_length": 724.0, + "completions/min_terminated_length": 724.0, + "epoch": 0.8209552388097024, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6763414212092824, + "kl": 0.021820068359375, + "learning_rate": 1.8529695736243417e-07, + "loss": -0.0423, + "num_tokens": 148633437.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.0342957973480225, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05028717250419313, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0787297582614017, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14089659985908765, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3283 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1266.875, + "completions/mean_terminated_length": 1233.571533203125, + "completions/min_length": 596.0, + "completions/min_terminated_length": 596.0, + "epoch": 0.8212053013253313, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.555051629363068, + "kl": 0.0146484375, + "learning_rate": 1.8506698688182692e-07, + "loss": -0.0292, + "num_tokens": 148685883.0, + "reward": 0.0, + "reward_std": 0.8947402238845825, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10733739721094965, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.31374874359249727, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11894598836509011, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3284 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1227.0, + "completions/mean_terminated_length": 1208.800048828125, + "completions/min_length": 875.0, + "completions/min_terminated_length": 875.0, + "epoch": 0.8214553638409603, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.286094162452857, + "kl": 0.009368896484375, + "learning_rate": 1.8483729446804432e-07, + "loss": 0.0319, + "num_tokens": 148723907.0, + "reward": 0.0, + "reward_std": 0.8295310735702515, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02785628545332943, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10996300794457746, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3285 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.75, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1444.4375, + "completions/mean_terminated_length": 1277.75, + "completions/min_length": 1127.0, + "completions/min_terminated_length": 1127.0, + "epoch": 0.8217054263565892, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4804587407528995, + "kl": 0.0130615234375, + "learning_rate": 1.846078802961043e-07, + "loss": -0.0196, + "num_tokens": 148780354.0, + "reward": 0.0, + "reward_std": 0.9122209548950195, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.058092844493283544, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18131174175666206, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3286 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1269.0, + "completions/mean_length": 1043.4375, + "completions/mean_terminated_length": 1013.0000610351562, + "completions/min_length": 834.0, + "completions/min_terminated_length": 834.0, + "epoch": 0.821955488872218, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.324735914987629, + "kl": 0.019317626953125, + "learning_rate": 1.8437874454081268e-07, + "loss": -0.0173, + "num_tokens": 148830481.0, + "reward": 0.0, + "reward_std": 0.3974177837371826, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08118209695197698, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17639970509019526, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11538983843829066, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3287 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1221.0, + "completions/mean_terminated_length": 1156.615478515625, + "completions/min_length": 754.0, + "completions/min_terminated_length": 754.0, + "epoch": 0.822205551387847, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4909493557601676, + "kl": 0.0201263427734375, + "learning_rate": 1.8414988737676354e-07, + "loss": -0.0453, + "num_tokens": 148887209.0, + "reward": 0.0, + "reward_std": 0.9631990194320679, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1426044746150302, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14910395971618765, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0739118594202782, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3288 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1480.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1166.5, + "completions/mean_terminated_length": 1166.5, + "completions/min_length": 856.0, + "completions/min_terminated_length": 856.0, + "epoch": 0.8224556139034759, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7079432776441097, + "kl": 0.0153961181640625, + "learning_rate": 1.8392130897833836e-07, + "loss": 0.0057, + "num_tokens": 148923481.0, + "reward": 0.0, + "reward_std": 1.019322156906128, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12462433930804641, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11165473424455719, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242309, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3289 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1317.25, + "completions/mean_terminated_length": 1175.111083984375, + "completions/min_length": 937.0, + "completions/min_terminated_length": 937.0, + "epoch": 0.8227056764191047, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.167172849112222, + "kl": 0.019439697265625, + "learning_rate": 1.8369300951970623e-07, + "loss": 0.0213, + "num_tokens": 148979517.0, + "reward": 0.0, + "reward_std": 0.6300787925720215, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.14559567657015932, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08617610979583633, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09583937179043478, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3290 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1392.0, + "completions/mean_length": 1109.0625, + "completions/mean_terminated_length": 1083.0, + "completions/min_length": 816.0, + "completions/min_terminated_length": 816.0, + "epoch": 0.8229557389347337, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.531681259249253, + "kl": 0.018218994140625, + "learning_rate": 1.8346498917482378e-07, + "loss": -0.0685, + "num_tokens": 149012318.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0196782350540161, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05441964972707712, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07087838677662844, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026005, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3291 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1293.0, + "completions/mean_length": 1006.25, + "completions/mean_terminated_length": 973.3333740234375, + "completions/min_length": 698.0, + "completions/min_terminated_length": 698.0, + "epoch": 0.8232058014503626, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4119275449459296, + "kl": 0.0177459716796875, + "learning_rate": 1.8323724811743494e-07, + "loss": -0.0447, + "num_tokens": 149051882.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0013210773468018, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0049078007812753495, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07004046066092225, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11855612829185827, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3292 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1289.0, + "completions/max_terminated_length": 1289.0, + "completions/mean_length": 941.125, + "completions/mean_terminated_length": 941.125, + "completions/min_length": 537.0, + "completions/min_terminated_length": 537.0, + "epoch": 0.8234558639659915, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8218673476935336, + "kl": 0.02471923828125, + "learning_rate": 1.8300978652107078e-07, + "loss": 0.0098, + "num_tokens": 149092844.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8059436678886414, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05123801579142713, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13604254150847203, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1112221667221529, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3293 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 1142.6875, + "completions/mean_terminated_length": 1118.86669921875, + "completions/min_length": 706.0, + "completions/min_terminated_length": 706.0, + "epoch": 0.8237059264816204, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0536257812545213, + "kl": 0.020904541015625, + "learning_rate": 1.8278260455904942e-07, + "loss": 0.0175, + "num_tokens": 149125679.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0639952421188354, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06613547067642667, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0531253297984531, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13871099718746435, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3294 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1215.0, + "completions/max_terminated_length": 1215.0, + "completions/mean_length": 1070.9375, + "completions/mean_terminated_length": 1070.9375, + "completions/min_length": 965.0, + "completions/min_terminated_length": 965.0, + "epoch": 0.8239559889972493, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.702290251950166, + "kl": 0.027435302734375, + "learning_rate": 1.8255570240447614e-07, + "loss": -0.0111, + "num_tokens": 149169094.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9519600868225098, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07043552849104721, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11504741942969329, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09418264367902598, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3295 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1299.5625, + "completions/mean_terminated_length": 1208.45458984375, + "completions/min_length": 980.0, + "completions/min_terminated_length": 980.0, + "epoch": 0.8242060515128782, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1211359493489637, + "kl": 0.0094146728515625, + "learning_rate": 1.8232908023024274e-07, + "loss": -0.0098, + "num_tokens": 149221031.0, + "reward": 0.0, + "reward_std": 1.0418121814727783, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.011028507229069368, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0437771680507723, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11800816042090449, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3296 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1311.6875, + "completions/mean_terminated_length": 1248.916748046875, + "completions/min_length": 839.0, + "completions/min_terminated_length": 839.0, + "epoch": 0.8244561140285072, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9206444930117397, + "kl": 0.018707275390625, + "learning_rate": 1.8210273820902788e-07, + "loss": -0.0176, + "num_tokens": 149272458.0, + "reward": 0.0, + "reward_std": 1.0221939086914062, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1510065197361149, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.159546799767265, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042252, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3297 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1142.9375, + "completions/mean_terminated_length": 1119.1334228515625, + "completions/min_length": 687.0, + "completions/min_terminated_length": 687.0, + "epoch": 0.824706176544136, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.326282968860227, + "kl": 0.0148773193359375, + "learning_rate": 1.818766765132964e-07, + "loss": 0.031, + "num_tokens": 149310649.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6133407354354858, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.023911748792009333, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13013412663166035, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13214750456578045, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3298 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1307.0, + "completions/mean_length": 1164.6875, + "completions/mean_terminated_length": 1087.3077392578125, + "completions/min_length": 859.0, + "completions/min_terminated_length": 859.0, + "epoch": 0.8249562390597649, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.170211410991944, + "kl": 0.017791748046875, + "learning_rate": 1.8165089531530003e-07, + "loss": -0.0264, + "num_tokens": 149368332.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9572227001190186, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08717508638515696, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15073104312389235, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0969917904124231, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3299 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1212.0, + "completions/max_terminated_length": 1212.0, + "completions/mean_length": 1035.0625, + "completions/mean_terminated_length": 1035.0625, + "completions/min_length": 857.0, + "completions/min_terminated_length": 857.0, + "epoch": 0.8252063015753939, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.800330732939194, + "kl": 0.00847625732421875, + "learning_rate": 1.8142539478707643e-07, + "loss": 0.0168, + "num_tokens": 149395869.0, + "reward": 0.0, + "reward_std": 0.8105767965316772, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0006715031030157135, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05036733251004605, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3300 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1281.5625, + "completions/mean_terminated_length": 1231.1539306640625, + "completions/min_length": 857.0, + "completions/min_terminated_length": 857.0, + "epoch": 0.8254563640910227, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8522698638309945, + "kl": 0.016845703125, + "learning_rate": 1.8120017510044949e-07, + "loss": 0.0051, + "num_tokens": 149446974.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9291296005249023, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04953907703042173, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11388624511790661, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298359, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3301 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1406.0, + "completions/mean_length": 1175.0, + "completions/mean_terminated_length": 1153.3333740234375, + "completions/min_length": 894.0, + "completions/min_terminated_length": 894.0, + "epoch": 0.8257064266066516, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.08364059391894, + "kl": 0.0196533203125, + "learning_rate": 1.809752364270294e-07, + "loss": -0.0133, + "num_tokens": 149486278.0, + "reward": 0.0, + "reward_std": 0.9932615756988525, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.004102116465847751, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09534541264593022, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14497764834110988, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3302 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1285.0, + "completions/mean_length": 1164.5, + "completions/mean_terminated_length": 1142.1334228515625, + "completions/min_length": 879.0, + "completions/min_terminated_length": 879.0, + "epoch": 0.8259564891222806, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.870185538375135, + "kl": 0.0167694091796875, + "learning_rate": 1.807505789382116e-07, + "loss": -0.0402, + "num_tokens": 149537286.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0475858449935913, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06350016107161828, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0780603832114344, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.65, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1018350154434631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3303 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1307.0, + "completions/mean_length": 978.375, + "completions/mean_terminated_length": 943.6000366210938, + "completions/min_length": 635.0, + "completions/min_terminated_length": 635.0, + "epoch": 0.8262065516379095, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6767688504244407, + "kl": 0.0134735107421875, + "learning_rate": 1.8052620280517777e-07, + "loss": -0.068, + "num_tokens": 149570564.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0108498334884644, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04731193038556004, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13486178923073233, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07698003589195011, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3304 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1403.25, + "completions/mean_terminated_length": 1359.2728271484375, + "completions/min_length": 1254.0, + "completions/min_terminated_length": 1254.0, + "epoch": 0.8264566141535384, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4564570917980766, + "kl": 0.016357421875, + "learning_rate": 1.8030210819889492e-07, + "loss": 0.0125, + "num_tokens": 149626648.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9693732261657715, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.042579371121952786, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0575815387471794, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08432740427115677, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3305 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1264.0, + "completions/mean_length": 1369.375, + "completions/mean_terminated_length": 1201.4285888671875, + "completions/min_length": 1079.0, + "completions/min_terminated_length": 1079.0, + "epoch": 0.8267066766691673, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.219801729823884, + "kl": 0.029510498046875, + "learning_rate": 1.8007829529011592e-07, + "loss": 0.0153, + "num_tokens": 149688774.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7847974300384521, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.17132821012437244, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21081221132846611, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15299479536052008, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3306 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1320.125, + "completions/mean_terminated_length": 1140.25, + "completions/min_length": 940.0, + "completions/min_terminated_length": 940.0, + "epoch": 0.8269567391847962, + "frac_reward_zero_std": 0.0, + "grad_norm": 262.3400112664821, + "kl": 1.679595947265625, + "learning_rate": 1.7985476424937868e-07, + "loss": 0.0723, + "num_tokens": 149750512.0, + "reward": 0.0, + "reward_std": 1.0604479312896729, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.023775739099007286, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.043521754792411056, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3307 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1400.0, + "completions/max_terminated_length": 1400.0, + "completions/mean_length": 973.9375, + "completions/mean_terminated_length": 973.9375, + "completions/min_length": 605.0, + "completions/min_terminated_length": 605.0, + "epoch": 0.8272068017004252, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.199771527565874, + "kl": 0.023345947265625, + "learning_rate": 1.7963151524700645e-07, + "loss": -0.0031, + "num_tokens": 149787415.0, + "reward": -3.725290298461914e-08, + "reward_std": 1.0467500686645508, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08563174801466132, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16867419577750553, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921948, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3308 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1467.0, + "completions/mean_length": 1401.4375, + "completions/mean_terminated_length": 1274.71435546875, + "completions/min_length": 823.0, + "completions/min_terminated_length": 823.0, + "epoch": 0.827456864216054, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.08642295307955, + "kl": 0.02392578125, + "learning_rate": 1.794085484531075e-07, + "loss": 0.0025, + "num_tokens": 149835654.0, + "reward": 0.0, + "reward_std": 0.5576320290565491, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03728755463655079, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05130425968909795, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15723301886761007, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3309 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1351.625, + "completions/mean_terminated_length": 1284.181884765625, + "completions/min_length": 1122.0, + "completions/min_terminated_length": 1122.0, + "epoch": 0.8277069267316829, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.149333473397876, + "kl": 0.010267257690429688, + "learning_rate": 1.7918586403757513e-07, + "loss": 0.017, + "num_tokens": 149882328.0, + "reward": 0.0, + "reward_std": 0.8166376352310181, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005801897083151076, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08912570358984709, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07290277645477444, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3310 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1370.0, + "completions/max_terminated_length": 1370.0, + "completions/mean_length": 1155.0, + "completions/mean_terminated_length": 1155.0, + "completions/min_length": 919.0, + "completions/min_terminated_length": 919.0, + "epoch": 0.8279569892473119, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.552301672397351, + "kl": 0.011474609375, + "learning_rate": 1.7896346217008739e-07, + "loss": -0.0454, + "num_tokens": 149918208.0, + "reward": 0.0, + "reward_std": 1.0367990732192993, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12940800006621364, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11245057364975543, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3311 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1247.3125, + "completions/mean_terminated_length": 1163.0833740234375, + "completions/min_length": 637.0, + "completions/min_terminated_length": 637.0, + "epoch": 0.8282070517629407, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0642049362411177, + "kl": 0.019195556640625, + "learning_rate": 1.7874134302010713e-07, + "loss": 0.0405, + "num_tokens": 149969565.0, + "reward": 0.0, + "reward_std": 0.5988802909851074, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1865376538297196, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3312 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1262.25, + "completions/mean_terminated_length": 1183.0, + "completions/min_length": 969.0, + "completions/min_terminated_length": 969.0, + "epoch": 0.8284571142785696, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4287451404743434, + "kl": 0.01519775390625, + "learning_rate": 1.7851950675688193e-07, + "loss": 0.0192, + "num_tokens": 150016617.0, + "reward": 0.0, + "reward_std": 1.0478367805480957, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.007208831931303741, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.028835327725214965, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14375905768565217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3313 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1143.5, + "completions/mean_terminated_length": 1092.571533203125, + "completions/min_length": 714.0, + "completions/min_terminated_length": 714.0, + "epoch": 0.8287071767941986, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2763062733479575, + "kl": 0.0179443359375, + "learning_rate": 1.7829795354944345e-07, + "loss": -0.024, + "num_tokens": 150061177.0, + "reward": 0.0, + "reward_std": 0.46911686658859253, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010352025192993023, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06745498005528128, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11925695879998881, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3314 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1368.0, + "completions/mean_length": 1410.3125, + "completions/mean_terminated_length": 1295.0, + "completions/min_length": 1174.0, + "completions/min_terminated_length": 1174.0, + "epoch": 0.8289572393098275, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.7898036203208398, + "kl": 0.0161285400390625, + "learning_rate": 1.7807668356660803e-07, + "loss": 0.002, + "num_tokens": 150112974.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0384305715560913, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0007064805311468489, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03234343183117711, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10327955589886446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3315 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1212.0, + "completions/mean_length": 1085.875, + "completions/mean_terminated_length": 1058.2667236328125, + "completions/min_length": 902.0, + "completions/min_terminated_length": 902.0, + "epoch": 0.8292073018254563, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.889502462325575, + "kl": 0.00847625732421875, + "learning_rate": 1.7785569697697588e-07, + "loss": 0.025, + "num_tokens": 150158828.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9413480758666992, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.006133715146383608, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06913128405429025, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11279282877125756, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3316 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1260.0, + "completions/max_terminated_length": 1260.0, + "completions/mean_length": 1023.75, + "completions/mean_terminated_length": 1023.75, + "completions/min_length": 748.0, + "completions/min_terminated_length": 748.0, + "epoch": 0.8294573643410853, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3251298320307963, + "kl": 0.0208740234375, + "learning_rate": 1.776349939489315e-07, + "loss": -0.0593, + "num_tokens": 150189960.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0481144189834595, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08449457057204356, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06935700065117864, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3317 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1382.0, + "completions/max_terminated_length": 1382.0, + "completions/mean_length": 1016.25, + "completions/mean_terminated_length": 1016.25, + "completions/min_length": 588.0, + "completions/min_terminated_length": 588.0, + "epoch": 0.8297074268567142, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6890235131165396, + "kl": 0.022430419921875, + "learning_rate": 1.7741457465064332e-07, + "loss": 0.0061, + "num_tokens": 150238380.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.053436517715454, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11405530939411587, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15657752530770203, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7124999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09339283817414601, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3318 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1420.0625, + "completions/mean_terminated_length": 1317.2857666015625, + "completions/min_length": 1093.0, + "completions/min_terminated_length": 1093.0, + "epoch": 0.829957489372343, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7861203131680234, + "kl": 0.0189208984375, + "learning_rate": 1.7719443925006334e-07, + "loss": 0.0306, + "num_tokens": 150289245.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.880077064037323, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.034936536299512225, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06278454126828649, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10749676997731401, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3319 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1496.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1147.875, + "completions/mean_terminated_length": 1147.875, + "completions/min_length": 937.0, + "completions/min_terminated_length": 937.0, + "epoch": 0.830207551887972, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2103626903954243, + "kl": 0.0208740234375, + "learning_rate": 1.7697458791492777e-07, + "loss": -0.0182, + "num_tokens": 150328011.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0336495637893677, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05808697044929774, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.057715655120197586, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0807373427759331, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3320 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1379.0, + "completions/mean_length": 1224.375, + "completions/mean_terminated_length": 1160.769287109375, + "completions/min_length": 667.0, + "completions/min_terminated_length": 667.0, + "epoch": 0.8304576144036009, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.890680576317345, + "kl": 0.0136260986328125, + "learning_rate": 1.7675502081275574e-07, + "loss": 0.0061, + "num_tokens": 150364849.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6507837772369385, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.052428373837633935, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.046633687072964655, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15962919996504865, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3321 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1254.5, + "completions/mean_terminated_length": 1238.1334228515625, + "completions/min_length": 1006.0, + "completions/min_terminated_length": 1006.0, + "epoch": 0.8307076769192298, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0061440480171853, + "kl": 0.0144805908203125, + "learning_rate": 1.7653573811084987e-07, + "loss": -0.0034, + "num_tokens": 150414497.0, + "reward": -2.2351741790771484e-08, + "reward_std": 0.9625765085220337, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06708118294916875, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10803227646658742, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16095778144410233, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3322 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1290.0, + "completions/mean_terminated_length": 1164.0, + "completions/min_length": 899.0, + "completions/min_terminated_length": 899.0, + "epoch": 0.8309577394348587, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7082229360850234, + "kl": 0.0160369873046875, + "learning_rate": 1.763167399762968e-07, + "loss": -0.0197, + "num_tokens": 150467705.0, + "reward": 0.0, + "reward_std": 0.79433274269104, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07289329739256194, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11597680007621719, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13871099718746435, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3323 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1198.375, + "completions/mean_terminated_length": 963.7777709960938, + "completions/min_length": 785.0, + "completions/min_terminated_length": 785.0, + "epoch": 0.8312078019504876, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9777072668389293, + "kl": 0.02215576171875, + "learning_rate": 1.7609802657596555e-07, + "loss": 0.0352, + "num_tokens": 150514151.0, + "reward": 0.0, + "reward_std": 0.7990173101425171, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09435766816932253, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14568241689817835, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3324 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1336.0, + "completions/mean_length": 1324.625, + "completions/mean_terminated_length": 1149.25, + "completions/min_length": 892.0, + "completions/min_terminated_length": 892.0, + "epoch": 0.8314578644661166, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.668721449254714, + "kl": 0.0235595703125, + "learning_rate": 1.758795980765085e-07, + "loss": -0.0134, + "num_tokens": 150566753.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9575490355491638, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00680044318299455, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06163229677268722, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.18772517343377457, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3325 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1287.375, + "completions/mean_terminated_length": 1190.727294921875, + "completions/min_length": 929.0, + "completions/min_terminated_length": 929.0, + "epoch": 0.8317079269817454, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.932226794960813, + "kl": 0.019805908203125, + "learning_rate": 1.756614546443609e-07, + "loss": -0.0153, + "num_tokens": 150609783.0, + "reward": 0.0, + "reward_std": 0.8407802581787109, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02190270907074023, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05802678287774548, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09727776191382573, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3326 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1152.125, + "completions/mean_terminated_length": 1128.933349609375, + "completions/min_length": 849.0, + "completions/min_terminated_length": 849.0, + "epoch": 0.8319579894973743, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.374451346556568, + "kl": 0.01983642578125, + "learning_rate": 1.7544359644574075e-07, + "loss": -0.0327, + "num_tokens": 150651105.0, + "reward": 0.0, + "reward_std": 0.7198449969291687, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03051991107873687, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05486591292560124, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09803627446568497, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3327 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1352.0, + "completions/mean_length": 1199.625, + "completions/mean_terminated_length": 1019.4000244140625, + "completions/min_length": 845.0, + "completions/min_terminated_length": 845.0, + "epoch": 0.8322080520130033, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.619876343238086, + "kl": 0.0177459716796875, + "learning_rate": 1.752260236466488e-07, + "loss": 0.0416, + "num_tokens": 150696443.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9766565561294556, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06487051786482351, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15526995809143768, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06871842709362769, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3328 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1408.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 1154.375, + "completions/mean_terminated_length": 1154.375, + "completions/min_length": 781.0, + "completions/min_terminated_length": 781.0, + "epoch": 0.8324581145286322, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8040864930695086, + "kl": 0.01300048828125, + "learning_rate": 1.7500873641286823e-07, + "loss": -0.043, + "num_tokens": 150741881.0, + "reward": 0.0, + "reward_std": 0.8253225088119507, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.002890118924763851, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06933326619103929, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797313, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3329 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1123.0, + "completions/mean_length": 914.5625, + "completions/mean_terminated_length": 875.5333862304688, + "completions/min_length": 627.0, + "completions/min_terminated_length": 627.0, + "epoch": 0.832708177044261, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.743160072425349, + "kl": 0.024169921875, + "learning_rate": 1.7479173490996486e-07, + "loss": -0.011, + "num_tokens": 150783338.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.971651017665863, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08025381308733269, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11566276463467542, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12412657816683506, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3330 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1440.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1031.125, + "completions/mean_terminated_length": 1031.125, + "completions/min_length": 547.0, + "completions/min_terminated_length": 547.0, + "epoch": 0.83295823955989, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.837990025803936, + "kl": 0.020233154296875, + "learning_rate": 1.7457501930328648e-07, + "loss": -0.0493, + "num_tokens": 150829412.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9208694100379944, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08296539657117959, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.053844406467793204, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0909822937597079, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3331 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1445.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 937.1875, + "completions/mean_terminated_length": 937.1875, + "completions/min_length": 544.0, + "completions/min_terminated_length": 544.0, + "epoch": 0.8332083020755189, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5864535445714254, + "kl": 0.0143280029296875, + "learning_rate": 1.743585897579633e-07, + "loss": -0.0453, + "num_tokens": 150867071.0, + "reward": 0.0, + "reward_std": 0.6030685901641846, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.32063043449953377, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3042259549283157, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3332 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 891.0, + "completions/mean_length": 1152.25, + "completions/mean_terminated_length": 804.5, + "completions/min_length": 628.0, + "completions/min_terminated_length": 628.0, + "epoch": 0.8334583645911477, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7427615615906693, + "kl": 0.0132904052734375, + "learning_rate": 1.741424464389071e-07, + "loss": -0.0292, + "num_tokens": 150912531.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0433030128479004, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.022925231026140323, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.057464239337763505, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3333 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1149.1875, + "completions/mean_terminated_length": 1032.25, + "completions/min_length": 651.0, + "completions/min_terminated_length": 651.0, + "epoch": 0.8337084271067767, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.039382649750022, + "kl": 0.0186767578125, + "learning_rate": 1.7392658951081228e-07, + "loss": -0.0127, + "num_tokens": 150967382.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8837684392929077, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10634569108963962, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11248702266337725, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06978803887752091, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3334 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1317.0, + "completions/max_terminated_length": 1317.0, + "completions/mean_length": 1113.1875, + "completions/mean_terminated_length": 1113.1875, + "completions/min_length": 754.0, + "completions/min_terminated_length": 754.0, + "epoch": 0.8339584896224056, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.591326738982829, + "kl": 0.0234375, + "learning_rate": 1.7371101913815462e-07, + "loss": -0.0596, + "num_tokens": 151008713.0, + "reward": 0.0, + "reward_std": 0.8455406427383423, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.011690946589991882, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.029045645816767676, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353543, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3335 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1383.0, + "completions/max_terminated_length": 1383.0, + "completions/mean_length": 1150.5, + "completions/mean_terminated_length": 1150.5, + "completions/min_length": 1064.0, + "completions/min_terminated_length": 1064.0, + "epoch": 0.8342085521380345, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5263208193173887, + "kl": 0.0112457275390625, + "learning_rate": 1.734957354851912e-07, + "loss": -0.0104, + "num_tokens": 151046177.0, + "reward": 4.470348358154297e-08, + "reward_std": 1.0180872678756714, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06565148385139245, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07879684032829408, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10945995377982527, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3336 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1316.0, + "completions/max_terminated_length": 1316.0, + "completions/mean_length": 1072.5625, + "completions/mean_terminated_length": 1072.5625, + "completions/min_length": 828.0, + "completions/min_terminated_length": 828.0, + "epoch": 0.8344586146536634, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.605868105826089, + "kl": 0.018524169921875, + "learning_rate": 1.7328073871596148e-07, + "loss": -0.021, + "num_tokens": 151088146.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.023923635482788, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05035984669593145, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09974658403519784, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3337 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1257.0, + "completions/max_terminated_length": 1257.0, + "completions/mean_length": 1028.125, + "completions/mean_terminated_length": 1028.125, + "completions/min_length": 781.0, + "completions/min_terminated_length": 781.0, + "epoch": 0.8347086771692923, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2994834875714525, + "kl": 0.0164337158203125, + "learning_rate": 1.730660289942858e-07, + "loss": -0.0006, + "num_tokens": 151126676.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8916612267494202, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.13793270018590295, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15009179456574678, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059628479399994376, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3338 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1335.0, + "completions/mean_length": 1260.6875, + "completions/mean_terminated_length": 1151.9091796875, + "completions/min_length": 895.0, + "completions/min_terminated_length": 895.0, + "epoch": 0.8349587396849212, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0981326550735884, + "kl": 0.01837158203125, + "learning_rate": 1.7285160648376544e-07, + "loss": -0.0238, + "num_tokens": 151176359.0, + "reward": 0.0, + "reward_std": 0.8486385345458984, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.13265735844857918, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10635267025276889, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12164002752505568, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3339 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1364.0, + "completions/mean_length": 1223.5625, + "completions/mean_terminated_length": 1131.416748046875, + "completions/min_length": 769.0, + "completions/min_terminated_length": 769.0, + "epoch": 0.8352088022005502, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.266308841371822, + "kl": 0.01377105712890625, + "learning_rate": 1.7263747134778344e-07, + "loss": 0.0058, + "num_tokens": 151228360.0, + "reward": 0.0, + "reward_std": 0.7665517330169678, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.022163796507034032, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0667954177915294, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12412657816683506, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3340 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1185.0, + "completions/mean_terminated_length": 1164.0, + "completions/min_length": 534.0, + "completions/min_terminated_length": 534.0, + "epoch": 0.835458864716179, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.275011702307313, + "kl": 0.02374267578125, + "learning_rate": 1.724236237495038e-07, + "loss": -0.0882, + "num_tokens": 151283784.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8023648262023926, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0014590891640388097, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0603843768885029, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10671873729054751, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3341 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1071.875, + "completions/mean_terminated_length": 1043.3333740234375, + "completions/min_length": 833.0, + "completions/min_terminated_length": 833.0, + "epoch": 0.8357089272318079, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.798271889544508, + "kl": 0.0155029296875, + "learning_rate": 1.7221006385187114e-07, + "loss": 0.0245, + "num_tokens": 151317054.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8726569414138794, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.026763884278404253, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0859648400245396, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3342 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1313.0, + "completions/mean_length": 1196.1875, + "completions/mean_terminated_length": 1152.7857666015625, + "completions/min_length": 1057.0, + "completions/min_terminated_length": 1057.0, + "epoch": 0.8359589897474369, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5094198907004315, + "kl": 0.00998687744140625, + "learning_rate": 1.71996791817611e-07, + "loss": -0.0279, + "num_tokens": 151367857.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0033040046691895, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10695791022092983, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18545692704040115, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746354, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3343 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1263.25, + "completions/mean_terminated_length": 1155.6363525390625, + "completions/min_length": 708.0, + "completions/min_terminated_length": 708.0, + "epoch": 0.8362090522630657, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9816312184448726, + "kl": 0.0163726806640625, + "learning_rate": 1.7178380780922961e-07, + "loss": -0.0674, + "num_tokens": 151422645.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0536880493164062, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12135579886957751, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08883171471749189, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1253144193766372, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3344 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1293.0, + "completions/mean_terminated_length": 1132.0, + "completions/min_length": 895.0, + "completions/min_terminated_length": 895.0, + "epoch": 0.8364591147786947, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9061378924349732, + "kl": 0.016387939453125, + "learning_rate": 1.715711119890137e-07, + "loss": -0.0969, + "num_tokens": 151483965.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0186551809310913, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10318905058240753, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08881879484469966, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10610965676722955, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3345 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1390.0, + "completions/max_terminated_length": 1390.0, + "completions/mean_length": 1088.375, + "completions/mean_terminated_length": 1088.375, + "completions/min_length": 796.0, + "completions/min_terminated_length": 796.0, + "epoch": 0.8367091772943236, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.932472668526012, + "kl": 0.018798828125, + "learning_rate": 1.7135870451903043e-07, + "loss": 0.0552, + "num_tokens": 151517459.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7196836471557617, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.032310978184877145, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04247728228812903, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14168300559373406, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3346 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1148.875, + "completions/mean_terminated_length": 989.2727661132812, + "completions/min_length": 658.0, + "completions/min_terminated_length": 658.0, + "epoch": 0.8369592398099525, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.43441024378494, + "kl": 0.019744873046875, + "learning_rate": 1.711465855611271e-07, + "loss": 0.0124, + "num_tokens": 151564873.0, + "reward": 0.0, + "reward_std": 0.8655962944030762, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.36662263449587373, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.41331249267326053, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11538983843829065, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3347 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1451.0, + "completions/max_terminated_length": 1451.0, + "completions/mean_length": 1064.3125, + "completions/mean_terminated_length": 1064.3125, + "completions/min_length": 794.0, + "completions/min_terminated_length": 794.0, + "epoch": 0.8372093023255814, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3286031070258244, + "kl": 0.0173492431640625, + "learning_rate": 1.7093475527693152e-07, + "loss": -0.0261, + "num_tokens": 151616126.0, + "reward": 0.0, + "reward_std": 1.0176938772201538, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011159135265478837, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1396305595056811, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3348 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1311.4375, + "completions/mean_terminated_length": 1164.77783203125, + "completions/min_length": 767.0, + "completions/min_terminated_length": 767.0, + "epoch": 0.8374593648412103, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.687607926388814, + "kl": 0.014923095703125, + "learning_rate": 1.7072321382785122e-07, + "loss": -0.0783, + "num_tokens": 151669205.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.03639554977417, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09956373464355076, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1435234470134992, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13270686158262923, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3349 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1467.0, + "completions/mean_length": 1308.5, + "completions/mean_terminated_length": 1159.5555419921875, + "completions/min_length": 769.0, + "completions/min_terminated_length": 769.0, + "epoch": 0.8377094273568392, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.975077850451756, + "kl": 0.0131988525390625, + "learning_rate": 1.7051196137507374e-07, + "loss": -0.0215, + "num_tokens": 151721525.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0388867855072021, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08148412916268905, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08108894735313112, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15049301694147854, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3350 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1479.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1057.8125, + "completions/mean_terminated_length": 1057.8125, + "completions/min_length": 750.0, + "completions/min_terminated_length": 750.0, + "epoch": 0.8379594898724682, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2362451933224237, + "kl": 0.021087646484375, + "learning_rate": 1.7030099807956647e-07, + "loss": 0.0429, + "num_tokens": 151752130.0, + "reward": 0.0, + "reward_std": 0.8963671922683716, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.010261130079932314, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03956289966202486, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194864, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3351 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1367.0, + "completions/mean_length": 1328.875, + "completions/mean_terminated_length": 1289.3846435546875, + "completions/min_length": 1097.0, + "completions/min_terminated_length": 1097.0, + "epoch": 0.838209552388097, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.222780436924989, + "kl": 0.0128631591796875, + "learning_rate": 1.700903241020764e-07, + "loss": 0.0238, + "num_tokens": 151794120.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.0637335777282715, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00912507709474088, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0717053405563714, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07685966046898339, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3352 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1463.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1224.1875, + "completions/mean_terminated_length": 1224.1875, + "completions/min_length": 818.0, + "completions/min_terminated_length": 818.0, + "epoch": 0.8384596149037259, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.356678253409891, + "kl": 0.0182647705078125, + "learning_rate": 1.6987993960313008e-07, + "loss": -0.0528, + "num_tokens": 151849499.0, + "reward": 0.0, + "reward_std": 0.9555983543395996, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08907270014614842, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0672190818531281, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3353 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1110.25, + "completions/mean_terminated_length": 1020.3077392578125, + "completions/min_length": 610.0, + "completions/min_terminated_length": 610.0, + "epoch": 0.8387096774193549, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.681736264985847, + "kl": 0.0177459716796875, + "learning_rate": 1.6966984474303342e-07, + "loss": -0.0422, + "num_tokens": 151890607.0, + "reward": 0.0, + "reward_std": 0.944535493850708, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04903468314688008, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06669706734405029, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11917929226045818, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3354 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1192.0, + "completions/mean_length": 1000.3125, + "completions/mean_terminated_length": 967.0000610351562, + "completions/min_length": 618.0, + "completions/min_terminated_length": 618.0, + "epoch": 0.8389597399349837, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1488878125726267, + "kl": 0.020111083984375, + "learning_rate": 1.69460039681872e-07, + "loss": -0.0036, + "num_tokens": 151934612.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9813454151153564, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.2734123984245377, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.24257967405938144, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10945995377982529, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3355 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1394.875, + "completions/mean_terminated_length": 1347.0909423828125, + "completions/min_length": 1073.0, + "completions/min_terminated_length": 1073.0, + "epoch": 0.8392098024506126, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.640371518423714, + "kl": 0.016998291015625, + "learning_rate": 1.6925052457951006e-07, + "loss": -0.0196, + "num_tokens": 151982002.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0437510013580322, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.046048675296376, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12579864847718922, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07490735018081413, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3356 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1481.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1073.5, + "completions/mean_terminated_length": 1073.5, + "completions/min_length": 855.0, + "completions/min_terminated_length": 855.0, + "epoch": 0.8394598649662416, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.136717003680305, + "kl": 0.0136871337890625, + "learning_rate": 1.690412995955911e-07, + "loss": -0.0077, + "num_tokens": 152020546.0, + "reward": 1.862645149230957e-08, + "reward_std": 1.058415412902832, + "rewards/wordcountpos_reward_GEOBench/mean": 1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.006110036802556192, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.073657195770894, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.093392838174146, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3357 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1457.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1082.875, + "completions/mean_terminated_length": 1082.875, + "completions/min_length": 851.0, + "completions/min_terminated_length": 851.0, + "epoch": 0.8397099274818705, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.00248624516453, + "kl": 0.0145721435546875, + "learning_rate": 1.6883236488953778e-07, + "loss": -0.0355, + "num_tokens": 152065336.0, + "reward": 0.0, + "reward_std": 0.7811299562454224, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03857018845606091, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12005843065963642, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237135, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3358 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1261.0, + "completions/mean_length": 1124.0625, + "completions/mean_terminated_length": 1099.0, + "completions/min_length": 895.0, + "completions/min_terminated_length": 895.0, + "epoch": 0.8399599899974993, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.537804643425113, + "kl": 0.0267333984375, + "learning_rate": 1.6862372062055135e-07, + "loss": 0.0098, + "num_tokens": 152102001.0, + "reward": 0.0, + "reward_std": 0.796058714389801, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.007790187951094784, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13597201512157772, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408157, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3359 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 1206.0625, + "completions/mean_terminated_length": 1164.071533203125, + "completions/min_length": 983.0, + "completions/min_terminated_length": 983.0, + "epoch": 0.8402100525131283, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8872350440126318, + "kl": 0.01677703857421875, + "learning_rate": 1.6841536694761178e-07, + "loss": 0.0136, + "num_tokens": 152144650.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.561803936958313, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.058577591174175006, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06211190561813455, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14807405554629052, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3360 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1246.5625, + "completions/mean_terminated_length": 1162.0833740234375, + "completions/min_length": 996.0, + "completions/min_terminated_length": 996.0, + "epoch": 0.8404601150287572, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2389537085353064, + "kl": 0.0098724365234375, + "learning_rate": 1.6820730402947763e-07, + "loss": 0.0031, + "num_tokens": 152188779.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9434800148010254, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09339283817414602, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3361 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1308.0, + "completions/mean_length": 1035.75, + "completions/mean_terminated_length": 1004.800048828125, + "completions/min_length": 301.0, + "completions/min_terminated_length": 301.0, + "epoch": 0.8407101775443862, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4735655012108753, + "kl": 0.02239990234375, + "learning_rate": 1.6799953202468597e-07, + "loss": 0.021, + "num_tokens": 152244631.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8816125392913818, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.003529879031567074, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.014119516126268297, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316813, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3362 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1295.5, + "completions/mean_terminated_length": 1248.3077392578125, + "completions/min_length": 995.0, + "completions/min_terminated_length": 995.0, + "epoch": 0.840960240060015, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3462586003450516, + "kl": 0.022064208984375, + "learning_rate": 1.677920510915521e-07, + "loss": -0.0059, + "num_tokens": 152289583.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9120301604270935, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08372979984481743, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11905314233198512, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3363 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 1188.0, + "completions/mean_terminated_length": 1143.4285888671875, + "completions/min_length": 916.0, + "completions/min_terminated_length": 916.0, + "epoch": 0.8412103025756439, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3351061479351047, + "kl": 0.022613525390625, + "learning_rate": 1.6758486138816958e-07, + "loss": -0.0746, + "num_tokens": 152333959.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0393438339233398, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009154596547360215, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10959999078856077, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11013459778666118, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3364 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1355.0, + "completions/mean_length": 1231.25, + "completions/mean_terminated_length": 1141.666748046875, + "completions/min_length": 979.0, + "completions/min_terminated_length": 979.0, + "epoch": 0.8414603650912729, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3526129355129095, + "kl": 0.020538330078125, + "learning_rate": 1.6737796307241018e-07, + "loss": 0.0059, + "num_tokens": 152377171.0, + "reward": 0.0, + "reward_std": 0.7966620326042175, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08091796752194588, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12665926807859956, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15396007178390023, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3365 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1387.0, + "completions/max_terminated_length": 1387.0, + "completions/mean_length": 1086.8125, + "completions/mean_terminated_length": 1086.8125, + "completions/min_length": 801.0, + "completions/min_terminated_length": 801.0, + "epoch": 0.8417104276069017, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3084587303196837, + "kl": 0.0207977294921875, + "learning_rate": 1.6717135630192352e-07, + "loss": -0.0816, + "num_tokens": 152417008.0, + "reward": 0.0, + "reward_std": 0.8765095472335815, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.000570763276357379, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10524137734890121, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10027739304327551, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3366 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1290.0, + "completions/mean_length": 1275.1875, + "completions/mean_terminated_length": 1100.3333740234375, + "completions/min_length": 778.0, + "completions/min_terminated_length": 778.0, + "epoch": 0.8419604901225306, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9961398761254285, + "kl": 0.0166168212890625, + "learning_rate": 1.6696504123413706e-07, + "loss": -0.0156, + "num_tokens": 152465435.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0131181478500366, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.004456291566499234, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.012762281022496144, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11547005383792518, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3367 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1397.9375, + "completions/mean_terminated_length": 1295.875, + "completions/min_length": 1012.0, + "completions/min_terminated_length": 1012.0, + "epoch": 0.8422105526381596, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6798234507815004, + "kl": 0.016571044921875, + "learning_rate": 1.6675901802625596e-07, + "loss": -0.0399, + "num_tokens": 152516906.0, + "reward": 0.0, + "reward_std": 0.7936362028121948, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.17511683201171827, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09134159223707118, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.18358568490953675, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3368 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1379.0, + "completions/mean_length": 1204.625, + "completions/mean_terminated_length": 1162.4285888671875, + "completions/min_length": 745.0, + "completions/min_terminated_length": 745.0, + "epoch": 0.8424606151537885, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.03187085273972, + "kl": 0.017242431640625, + "learning_rate": 1.6655328683526321e-07, + "loss": -0.0474, + "num_tokens": 152568956.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0117475986480713, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.015760385785527735, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.028927344683885522, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715323, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3369 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1207.9375, + "completions/mean_terminated_length": 1166.21435546875, + "completions/min_length": 765.0, + "completions/min_terminated_length": 765.0, + "epoch": 0.8427106776694173, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2034243481988636, + "kl": 0.01690673828125, + "learning_rate": 1.6634784781791895e-07, + "loss": 0.0104, + "num_tokens": 152617251.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9964360594749451, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.006361509075521891, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06425330160310021, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09888264649460887, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3370 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1271.8125, + "completions/mean_terminated_length": 1256.60009765625, + "completions/min_length": 879.0, + "completions/min_terminated_length": 879.0, + "epoch": 0.8429607401850463, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8343394336360577, + "kl": 0.0176849365234375, + "learning_rate": 1.661427011307609e-07, + "loss": 0.0105, + "num_tokens": 152661496.0, + "reward": 0.0, + "reward_std": 1.0652432441711426, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0070372710428450265, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02956022686496505, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13871099718746435, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3371 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1394.0, + "completions/max_terminated_length": 1394.0, + "completions/mean_length": 1108.5625, + "completions/mean_terminated_length": 1108.5625, + "completions/min_length": 782.0, + "completions/min_terminated_length": 782.0, + "epoch": 0.8432108027006752, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3154086882707956, + "kl": 0.014892578125, + "learning_rate": 1.659378469301042e-07, + "loss": 0.0001, + "num_tokens": 152713801.0, + "reward": 1.862645149230957e-08, + "reward_std": 0.9403879046440125, + "rewards/wordcountpos_reward_GEOBench/mean": 1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.024506240920955456, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14922947975633846, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.094182643679026, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3372 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1366.0, + "completions/mean_length": 1319.75, + "completions/mean_terminated_length": 1139.5, + "completions/min_length": 849.0, + "completions/min_terminated_length": 849.0, + "epoch": 0.843460865216304, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.492120596555075, + "kl": 0.0139617919921875, + "learning_rate": 1.6573328537204093e-07, + "loss": -0.0003, + "num_tokens": 152775773.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.8637921810150146, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09646816747249448, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10391570895836347, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3373 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1461.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 998.0625, + "completions/mean_terminated_length": 998.0625, + "completions/min_length": 595.0, + "completions/min_terminated_length": 595.0, + "epoch": 0.843710927731933, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.322329467104341, + "kl": 0.0163116455078125, + "learning_rate": 1.6552901661244e-07, + "loss": 0.0157, + "num_tokens": 152815438.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0105340480804443, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0967816192099881, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15852051209239057, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11894598836509011, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3374 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1481.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1167.5, + "completions/mean_terminated_length": 1167.5, + "completions/min_length": 893.0, + "completions/min_terminated_length": 893.0, + "epoch": 0.8439609902475619, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4110677191104735, + "kl": 0.02032470703125, + "learning_rate": 1.6532504080694742e-07, + "loss": 0.0008, + "num_tokens": 152851750.0, + "reward": -4.470348358154297e-08, + "reward_std": 1.0048493146896362, + "rewards/wordcountpos_reward_GEOBench/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.027233661384134304, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07376496296991886, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3375 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1449.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1225.25, + "completions/mean_terminated_length": 1225.25, + "completions/min_length": 1043.0, + "completions/min_terminated_length": 1043.0, + "epoch": 0.8442110527631908, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.661404324525637, + "kl": 0.018157958984375, + "learning_rate": 1.6512135811098613e-07, + "loss": -0.0203, + "num_tokens": 152893090.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8648989200592041, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0874692795361345, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1934558700377816, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08777074514725114, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3376 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1418.0, + "completions/mean_length": 1169.5625, + "completions/mean_terminated_length": 1122.357177734375, + "completions/min_length": 643.0, + "completions/min_terminated_length": 643.0, + "epoch": 0.8444611152788197, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.068553263831378, + "kl": 0.0177459716796875, + "learning_rate": 1.6491796867975555e-07, + "loss": -0.0319, + "num_tokens": 152941755.0, + "reward": 0.0, + "reward_std": 0.7996862530708313, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010365301427780995, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13589533860542563, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3377 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1404.0, + "completions/max_terminated_length": 1404.0, + "completions/mean_length": 1214.25, + "completions/mean_terminated_length": 1214.25, + "completions/min_length": 1023.0, + "completions/min_terminated_length": 1023.0, + "epoch": 0.8447111777944486, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.378797304866942, + "kl": 0.02294921875, + "learning_rate": 1.6471487266823164e-07, + "loss": -0.0292, + "num_tokens": 152982767.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9473025798797607, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.15680927689275395, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1230996079627716, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11013459778666118, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3378 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1455.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1243.125, + "completions/mean_terminated_length": 1243.125, + "completions/min_length": 1082.0, + "completions/min_terminated_length": 1082.0, + "epoch": 0.8449612403100775, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9931347128674062, + "kl": 0.0128173828125, + "learning_rate": 1.645120702311667e-07, + "loss": -0.0021, + "num_tokens": 153031873.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5821045637130737, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11622916791851474, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12563619225269965, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258096, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3379 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1204.0, + "completions/max_terminated_length": 1204.0, + "completions/mean_length": 1062.9375, + "completions/mean_terminated_length": 1062.9375, + "completions/min_length": 801.0, + "completions/min_terminated_length": 801.0, + "epoch": 0.8452113028257064, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3719714993712153, + "kl": 0.017791748046875, + "learning_rate": 1.643095615230896e-07, + "loss": -0.0339, + "num_tokens": 153079944.0, + "reward": 0.0, + "reward_std": 0.6870654821395874, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1537732618839729, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15323707383302385, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09727776191382574, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3380 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1428.0, + "completions/mean_length": 1256.625, + "completions/mean_terminated_length": 1175.5, + "completions/min_length": 1035.0, + "completions/min_terminated_length": 1035.0, + "epoch": 0.8454613653413353, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3423475844939343, + "kl": 0.0164031982421875, + "learning_rate": 1.6410734669830522e-07, + "loss": -0.0146, + "num_tokens": 153134858.0, + "reward": 0.0, + "reward_std": 0.9648892879486084, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04622210848072157, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05953330804122073, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11925695879998881, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3381 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1157.0625, + "completions/mean_terminated_length": 1108.071533203125, + "completions/min_length": 786.0, + "completions/min_terminated_length": 786.0, + "epoch": 0.8457114278569643, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9553912723203486, + "kl": 0.0164642333984375, + "learning_rate": 1.6390542591089435e-07, + "loss": -0.0307, + "num_tokens": 153178227.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5596414804458618, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05902210908384607, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12494008483961233, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10390522747338703, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3382 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1376.0, + "completions/mean_length": 1179.25, + "completions/mean_terminated_length": 1072.3333740234375, + "completions/min_length": 746.0, + "completions/min_terminated_length": 746.0, + "epoch": 0.8459614903725932, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.261535546090024, + "kl": 0.0201568603515625, + "learning_rate": 1.6370379931471417e-07, + "loss": -0.0199, + "num_tokens": 153235191.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.7193119525909424, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.012078962284503073, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07268521070058635, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12641788434189793, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3383 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1234.3125, + "completions/mean_terminated_length": 1196.357177734375, + "completions/min_length": 1000.0, + "completions/min_terminated_length": 1000.0, + "epoch": 0.846211552888222, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2790230328680425, + "kl": 0.017791748046875, + "learning_rate": 1.6350246706339738e-07, + "loss": -0.0853, + "num_tokens": 153286156.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9944255352020264, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03489106741107114, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04526849743672893, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10174405069512345, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3384 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1381.375, + "completions/mean_terminated_length": 1310.2000732421875, + "completions/min_length": 1122.0, + "completions/min_terminated_length": 1122.0, + "epoch": 0.846461615403851, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1720544278056173, + "kl": 0.007171630859375, + "learning_rate": 1.6330142931035235e-07, + "loss": -0.0262, + "num_tokens": 153341138.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8529678583145142, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02723965165557415, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1420430765790504, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0768596604689834, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3385 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1477.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1066.5625, + "completions/mean_terminated_length": 1066.5625, + "completions/min_length": 804.0, + "completions/min_terminated_length": 804.0, + "epoch": 0.8467116779194799, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.660004788190825, + "kl": 0.01666259765625, + "learning_rate": 1.6310068620876326e-07, + "loss": 0.0155, + "num_tokens": 153385955.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0217692852020264, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00746635165359022, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.069173184892951, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901162, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3386 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1177.9375, + "completions/mean_terminated_length": 1070.5833740234375, + "completions/min_length": 940.0, + "completions/min_terminated_length": 940.0, + "epoch": 0.8469617404351087, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.403229447447892, + "kl": 0.01464080810546875, + "learning_rate": 1.629002379115897e-07, + "loss": 0.0014, + "num_tokens": 153444226.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.9079053401947021, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05310296687266994, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11883110515672463, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6499999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.057089922571845, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3387 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1399.0, + "completions/max_terminated_length": 1399.0, + "completions/mean_length": 1094.0625, + "completions/mean_terminated_length": 1094.0625, + "completions/min_length": 789.0, + "completions/min_terminated_length": 789.0, + "epoch": 0.8472118029507377, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.196789836807425, + "kl": 0.0154266357421875, + "learning_rate": 1.6270008457156649e-07, + "loss": -0.0057, + "num_tokens": 153484075.0, + "reward": -5.21540641784668e-08, + "reward_std": 1.0615969896316528, + "rewards/wordcountpos_reward_GEOBench/mean": -5.21540641784668e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02776478824942396, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07151870892070497, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08062257748298551, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3388 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1487.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1195.0, + "completions/mean_terminated_length": 1195.0, + "completions/min_length": 721.0, + "completions/min_terminated_length": 721.0, + "epoch": 0.8474618654663666, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.068875480713184, + "kl": 0.0161895751953125, + "learning_rate": 1.6250022634120386e-07, + "loss": -0.037, + "num_tokens": 153534219.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6147984862327576, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.14356702133719787, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11011940985718148, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12412657816683505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3389 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1412.0, + "completions/max_terminated_length": 1412.0, + "completions/mean_length": 1076.125, + "completions/mean_terminated_length": 1076.125, + "completions/min_length": 875.0, + "completions/min_terminated_length": 875.0, + "epoch": 0.8477119279819955, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0823543053394133, + "kl": 0.015350341796875, + "learning_rate": 1.6230066337278723e-07, + "loss": 0.0036, + "num_tokens": 153563885.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.994970440864563, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05111130027098357, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07984880213872673, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567835, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3390 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1369.0, + "completions/max_terminated_length": 1369.0, + "completions/mean_length": 1088.8125, + "completions/mean_terminated_length": 1088.8125, + "completions/min_length": 777.0, + "completions/min_terminated_length": 777.0, + "epoch": 0.8479619904976244, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.507193559876226, + "kl": 0.026580810546875, + "learning_rate": 1.6210139581837698e-07, + "loss": -0.0537, + "num_tokens": 153610498.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.7636800408363342, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04581941767617748, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04276093849293745, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11474609652039004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3391 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1041.0, + "completions/mean_length": 1128.0625, + "completions/mean_terminated_length": 756.125, + "completions/min_length": 567.0, + "completions/min_terminated_length": 567.0, + "epoch": 0.8482120530132533, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0965703156281115, + "kl": 0.019622802734375, + "learning_rate": 1.6190242382980812e-07, + "loss": 0.033, + "num_tokens": 153650531.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9019227623939514, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07757819096621234, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07107798951255152, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1593970119149271, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3392 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1487.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1058.6875, + "completions/mean_terminated_length": 1058.6875, + "completions/min_length": 643.0, + "completions/min_terminated_length": 643.0, + "epoch": 0.8484621155288822, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.572481098433864, + "kl": 0.02008056640625, + "learning_rate": 1.6170374755869095e-07, + "loss": 0.0134, + "num_tokens": 153695238.0, + "reward": 0.0, + "reward_std": 0.5739260911941528, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05792664552994947, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.076745984752615, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869929, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3393 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1308.0, + "completions/max_terminated_length": 1308.0, + "completions/mean_length": 1125.0625, + "completions/mean_terminated_length": 1125.0625, + "completions/min_length": 904.0, + "completions/min_terminated_length": 904.0, + "epoch": 0.8487121780445112, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.03846221733308, + "kl": 0.01507568359375, + "learning_rate": 1.6150536715641003e-07, + "loss": -0.0195, + "num_tokens": 153744207.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0142631530761719, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.015572436037919031, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04299381407571272, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10809803506625451, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3394 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1427.0, + "completions/mean_length": 1127.4375, + "completions/mean_terminated_length": 1041.4615478515625, + "completions/min_length": 738.0, + "completions/min_terminated_length": 738.0, + "epoch": 0.84896224056014, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0662799065471242, + "kl": 0.0190582275390625, + "learning_rate": 1.613072827741247e-07, + "loss": -0.0373, + "num_tokens": 153779510.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7951164841651917, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10422411245504025, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11546695632268794, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066474, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3395 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1095.0625, + "completions/mean_terminated_length": 1068.0667724609375, + "completions/min_length": 865.0, + "completions/min_terminated_length": 865.0, + "epoch": 0.8492123030757689, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2702065318910702, + "kl": 0.01983642578125, + "learning_rate": 1.611094945627687e-07, + "loss": 0.0635, + "num_tokens": 153814367.0, + "reward": 0.0, + "reward_std": 0.5954619646072388, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06509487578711483, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09688417872167149, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952499, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3396 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1381.0, + "completions/mean_length": 1110.25, + "completions/mean_terminated_length": 980.3333740234375, + "completions/min_length": 551.0, + "completions/min_terminated_length": 551.0, + "epoch": 0.8494623655913979, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.248995165272836, + "kl": 0.021697998046875, + "learning_rate": 1.6091200267304993e-07, + "loss": 0.0466, + "num_tokens": 153849595.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0349496603012085, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.013462419727436071, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06207372961178985, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3397 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1451.0, + "completions/mean_length": 1208.5, + "completions/mean_terminated_length": 981.7777709960938, + "completions/min_length": 406.0, + "completions/min_terminated_length": 406.0, + "epoch": 0.8497124281070267, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.468073080376966, + "kl": 0.020751953125, + "learning_rate": 1.6071480725545085e-07, + "loss": -0.024, + "num_tokens": 153907195.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9018121957778931, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019924937119654002, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11147490259834203, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258102, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3398 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1312.75, + "completions/mean_terminated_length": 1300.2667236328125, + "completions/min_length": 1162.0, + "completions/min_terminated_length": 1162.0, + "epoch": 0.8499624906226556, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.915468284508749, + "kl": 0.0169677734375, + "learning_rate": 1.6051790846022754e-07, + "loss": -0.0139, + "num_tokens": 153952351.0, + "reward": 4.470348358154297e-08, + "reward_std": 1.0220069885253906, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.041156932880515834, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06121464014744759, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11792967144619462, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3399 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1247.0, + "completions/max_terminated_length": 1247.0, + "completions/mean_length": 1009.625, + "completions/mean_terminated_length": 1009.625, + "completions/min_length": 909.0, + "completions/min_terminated_length": 909.0, + "epoch": 0.8502125531382846, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.888616386729181, + "kl": 0.0106353759765625, + "learning_rate": 1.6032130643741064e-07, + "loss": -0.0263, + "num_tokens": 153984929.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0259289741516113, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.011513149395057373, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.062129219024084574, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0697880388775209, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3400 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1423.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1013.5, + "completions/mean_terminated_length": 1013.5, + "completions/min_length": 603.0, + "completions/min_terminated_length": 603.0, + "epoch": 0.8504626156539135, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8197080683216895, + "kl": 0.032073974609375, + "learning_rate": 1.6012500133680417e-07, + "loss": -0.0584, + "num_tokens": 154025033.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.9082266092300415, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.007042716346184233, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02211101942273821, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13492110177323527, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3401 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1348.0625, + "completions/mean_terminated_length": 1196.125, + "completions/min_length": 545.0, + "completions/min_terminated_length": 545.0, + "epoch": 0.8507126781695424, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6970694849835137, + "kl": 0.0141448974609375, + "learning_rate": 1.5992899330798619e-07, + "loss": -0.0286, + "num_tokens": 154076474.0, + "reward": 0.0, + "reward_std": 0.925402045249939, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.033751479194976015, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0854543922666069, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11538983843829065, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3402 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1095.5625, + "completions/mean_terminated_length": 1068.60009765625, + "completions/min_length": 771.0, + "completions/min_terminated_length": 771.0, + "epoch": 0.8509627406851713, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.813431530516285, + "kl": 0.019012451171875, + "learning_rate": 1.5973328250030817e-07, + "loss": -0.0072, + "num_tokens": 154129475.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9050154089927673, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09929683362962266, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12925545847982592, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3403 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1309.125, + "completions/mean_terminated_length": 1265.0770263671875, + "completions/min_length": 904.0, + "completions/min_terminated_length": 904.0, + "epoch": 0.8512128032008002, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.009572369330942, + "kl": 0.024627685546875, + "learning_rate": 1.595378690628954e-07, + "loss": -0.0072, + "num_tokens": 154172933.0, + "reward": 0.0, + "reward_std": 1.0436674356460571, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.014282952433599984, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03632383669538415, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0749073501808141, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3404 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1408.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 1046.875, + "completions/mean_terminated_length": 1046.875, + "completions/min_length": 592.0, + "completions/min_terminated_length": 592.0, + "epoch": 0.8514628657164292, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0164935621033506, + "kl": 0.018585205078125, + "learning_rate": 1.593427531446463e-07, + "loss": -0.0839, + "num_tokens": 154218531.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.94865882396698, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.018486773880596274, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10400108517000459, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11894598836509011, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3405 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1364.0, + "completions/max_terminated_length": 1364.0, + "completions/mean_length": 1035.125, + "completions/mean_terminated_length": 1035.125, + "completions/min_length": 647.0, + "completions/min_terminated_length": 647.0, + "epoch": 0.851712928232058, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7302831973804014, + "kl": 0.042877197265625, + "learning_rate": 1.591479348942328e-07, + "loss": 0.0204, + "num_tokens": 154265501.0, + "reward": 0.0, + "reward_std": 0.9500570893287659, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.028136739419588372, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08661670037895872, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316813, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3406 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1362.0, + "completions/mean_length": 1030.875, + "completions/mean_terminated_length": 999.6000366210938, + "completions/min_length": 564.0, + "completions/min_terminated_length": 564.0, + "epoch": 0.8519629907476869, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0794227784469204, + "kl": 0.014892578125, + "learning_rate": 1.5895341446009972e-07, + "loss": -0.0575, + "num_tokens": 154310683.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7210105657577515, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10792002761362691, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.306600716331647, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1112221667221529, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3407 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1360.0, + "completions/max_terminated_length": 1360.0, + "completions/mean_length": 1055.5, + "completions/mean_terminated_length": 1055.5, + "completions/min_length": 765.0, + "completions/min_terminated_length": 765.0, + "epoch": 0.8522130532633159, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6969555726966967, + "kl": 0.021820068359375, + "learning_rate": 1.587591919904655e-07, + "loss": 0.0189, + "num_tokens": 154356571.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9715641736984253, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.006809252589510432, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0459369149868894, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3408 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1430.0, + "completions/max_terminated_length": 1430.0, + "completions/mean_length": 1125.625, + "completions/mean_terminated_length": 1125.625, + "completions/min_length": 876.0, + "completions/min_terminated_length": 876.0, + "epoch": 0.8524631157789447, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2959071361156123, + "kl": 0.02044677734375, + "learning_rate": 1.585652676333211e-07, + "loss": 0.0419, + "num_tokens": 154389053.0, + "reward": -2.2351741790771484e-08, + "reward_std": 0.9079267382621765, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.022756666490524997, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07969606543213599, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11917929226045816, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3409 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 1416.25, + "completions/mean_terminated_length": 1276.666748046875, + "completions/min_length": 1170.0, + "completions/min_terminated_length": 1170.0, + "epoch": 0.8527131782945736, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.38330036086136, + "kl": 0.022979736328125, + "learning_rate": 1.5837164153643013e-07, + "loss": 0.017, + "num_tokens": 154451953.0, + "reward": 0.0, + "reward_std": 0.7248988151550293, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.036778499746062995, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06135636987503772, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.65, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08944271909999159, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3410 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1197.0, + "completions/max_terminated_length": 1197.0, + "completions/mean_length": 1015.0625, + "completions/mean_terminated_length": 1015.0625, + "completions/min_length": 708.0, + "completions/min_terminated_length": 708.0, + "epoch": 0.8529632408102026, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5705353643131703, + "kl": 0.02069091796875, + "learning_rate": 1.5817831384732965e-07, + "loss": 0.0189, + "num_tokens": 154488130.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9354285001754761, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0008061820968698792, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.041023897257836325, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036263, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3411 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1404.0, + "completions/mean_length": 1105.5, + "completions/mean_terminated_length": 1079.2000732421875, + "completions/min_length": 582.0, + "completions/min_terminated_length": 582.0, + "epoch": 0.8532133033258315, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7552360143973105, + "kl": 0.0154876708984375, + "learning_rate": 1.5798528471332874e-07, + "loss": -0.092, + "num_tokens": 154538850.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0574856996536255, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.19072692315643838, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19989211240365962, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06666666666666671, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3412 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1366.1875, + "completions/mean_terminated_length": 1321.5833740234375, + "completions/min_length": 1111.0, + "completions/min_terminated_length": 1111.0, + "epoch": 0.8534633658414603, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.811948072477752, + "kl": 0.0174713134765625, + "learning_rate": 1.577925542815092e-07, + "loss": -0.0052, + "num_tokens": 154588989.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9129635691642761, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.012103321281422106, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11976457984080754, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13763881881375054, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3413 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1271.9375, + "completions/mean_terminated_length": 1256.7333984375, + "completions/min_length": 996.0, + "completions/min_terminated_length": 996.0, + "epoch": 0.8537134283570893, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.825546039625914, + "kl": 0.01739501953125, + "learning_rate": 1.5760012269872518e-07, + "loss": 0.015, + "num_tokens": 154639740.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9771131873130798, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.011457358162067149, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11615188331573294, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09888264649460886, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3414 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1270.0, + "completions/max_terminated_length": 1270.0, + "completions/mean_length": 856.625, + "completions/mean_terminated_length": 856.625, + "completions/min_length": 690.0, + "completions/min_terminated_length": 690.0, + "epoch": 0.8539634908727182, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.226112736944573, + "kl": 0.009845733642578125, + "learning_rate": 1.5740799011160325e-07, + "loss": 0.0092, + "num_tokens": 154678734.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9533878564834595, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08907810081015362, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10195331210369231, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09888264649460887, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3415 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1359.0, + "completions/max_terminated_length": 1359.0, + "completions/mean_length": 879.0, + "completions/mean_terminated_length": 879.0, + "completions/min_length": 475.0, + "completions/min_terminated_length": 475.0, + "epoch": 0.854213553388347, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5865213698283918, + "kl": 0.0169830322265625, + "learning_rate": 1.5721615666654199e-07, + "loss": -0.0355, + "num_tokens": 154718342.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7312920689582825, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.16734111161403775, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2606736067951536, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17883366987645666, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3416 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1286.0, + "completions/max_terminated_length": 1286.0, + "completions/mean_length": 1124.5625, + "completions/mean_terminated_length": 1124.5625, + "completions/min_length": 902.0, + "completions/min_terminated_length": 902.0, + "epoch": 0.854463615903976, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.295635405533307, + "kl": 0.021881103515625, + "learning_rate": 1.5702462250971197e-07, + "loss": -0.0058, + "num_tokens": 154766959.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5610868334770203, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1835776501625578, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08829837766836554, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.061913918736689055, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3417 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1398.0, + "completions/mean_length": 1214.0625, + "completions/mean_terminated_length": 991.6666870117188, + "completions/min_length": 791.0, + "completions/min_terminated_length": 791.0, + "epoch": 0.8547136784196049, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.550729592618446, + "kl": 0.022216796875, + "learning_rate": 1.568333877870561e-07, + "loss": -0.0222, + "num_tokens": 154814168.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.002830982208252, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04021387719904925, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14177256328558036, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869929, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3418 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1191.625, + "completions/mean_terminated_length": 1171.0667724609375, + "completions/min_length": 980.0, + "completions/min_terminated_length": 980.0, + "epoch": 0.8549637409352339, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2523083877932555, + "kl": 0.018035888671875, + "learning_rate": 1.566424526442887e-07, + "loss": -0.0764, + "num_tokens": 154856914.0, + "reward": 0.0, + "reward_std": 0.9880672097206116, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0577952005422942, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05054218412938844, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06191391873668906, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3419 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1304.0, + "completions/mean_length": 1057.625, + "completions/mean_terminated_length": 1028.1334228515625, + "completions/min_length": 751.0, + "completions/min_terminated_length": 751.0, + "epoch": 0.8552138034508627, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.562969343257601, + "kl": 0.0218505859375, + "learning_rate": 1.5645181722689617e-07, + "loss": -0.0559, + "num_tokens": 154905876.0, + "reward": 0.0, + "reward_std": 0.6675546169281006, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.041589836430960606, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18587503389610466, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11547005383792518, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3420 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1494.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1135.3125, + "completions/mean_terminated_length": 1135.3125, + "completions/min_length": 763.0, + "completions/min_terminated_length": 763.0, + "epoch": 0.8554638659664916, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.989644877023943, + "kl": 0.0139007568359375, + "learning_rate": 1.5626148168013625e-07, + "loss": 0.0086, + "num_tokens": 154949257.0, + "reward": 0.0, + "reward_std": 0.6840847730636597, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.16786561575572917, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10472187230588696, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12412657816683505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3421 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1260.0, + "completions/mean_length": 1157.0, + "completions/mean_terminated_length": 1108.0, + "completions/min_length": 896.0, + "completions/min_terminated_length": 896.0, + "epoch": 0.8557139284821206, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4742034227660743, + "kl": 0.017242431640625, + "learning_rate": 1.5607144614903828e-07, + "loss": 0.0264, + "num_tokens": 154997977.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8803448677062988, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.049330938634121205, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09083209053686099, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3422 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1190.0, + "completions/mean_length": 1237.0625, + "completions/mean_terminated_length": 974.125, + "completions/min_length": 861.0, + "completions/min_terminated_length": 861.0, + "epoch": 0.8559639909977494, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2055411901893196, + "kl": 0.01806640625, + "learning_rate": 1.5588171077840305e-07, + "loss": -0.0224, + "num_tokens": 155056890.0, + "reward": 0.0, + "reward_std": 0.8473159670829773, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.3381549314095608, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07850626924872585, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11538983843829065, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3423 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1201.75, + "completions/mean_terminated_length": 1181.86669921875, + "completions/min_length": 795.0, + "completions/min_terminated_length": 795.0, + "epoch": 0.8562140535133783, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.659469408505165, + "kl": 0.0153656005859375, + "learning_rate": 1.5569227571280253e-07, + "loss": -0.043, + "num_tokens": 155096294.0, + "reward": 0.0, + "reward_std": 0.4951978325843811, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.034679696438516, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0330089820693625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1128748897706693, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3424 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1159.0, + "completions/mean_length": 1247.75, + "completions/mean_terminated_length": 995.5, + "completions/min_length": 867.0, + "completions/min_terminated_length": 867.0, + "epoch": 0.8564641160290073, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3336099603760916, + "kl": 0.03228759765625, + "learning_rate": 1.5550314109658015e-07, + "loss": -0.0232, + "num_tokens": 155143410.0, + "reward": 0.0, + "reward_std": 0.49848324060440063, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0873112644091493, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16167492004356804, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1970147578604578, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3425 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1126.3125, + "completions/mean_terminated_length": 1101.4000244140625, + "completions/min_length": 963.0, + "completions/min_terminated_length": 963.0, + "epoch": 0.8567141785446362, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.284779221027052, + "kl": 0.00838470458984375, + "learning_rate": 1.5531430707385011e-07, + "loss": -0.036, + "num_tokens": 155188311.0, + "reward": 0.0, + "reward_std": 0.9389376640319824, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03237979305449484, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10281395973449427, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3426 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1319.0, + "completions/mean_length": 1163.5625, + "completions/mean_terminated_length": 1141.1334228515625, + "completions/min_length": 986.0, + "completions/min_terminated_length": 986.0, + "epoch": 0.856964241060265, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8692762110325902, + "kl": 0.0184783935546875, + "learning_rate": 1.551257737884975e-07, + "loss": -0.022, + "num_tokens": 155228296.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7833160161972046, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05990475301704476, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06781044574661481, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13305526559931294, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3427 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1404.0, + "completions/max_terminated_length": 1404.0, + "completions/mean_length": 1089.5625, + "completions/mean_terminated_length": 1089.5625, + "completions/min_length": 906.0, + "completions/min_terminated_length": 906.0, + "epoch": 0.857214303575894, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3965279617184234, + "kl": 0.0128326416015625, + "learning_rate": 1.5493754138417847e-07, + "loss": -0.0162, + "num_tokens": 155269105.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.018824577331543, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12649618604407986, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08324852174537112, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620104, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3428 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1476.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1203.6875, + "completions/mean_terminated_length": 1203.6875, + "completions/min_length": 926.0, + "completions/min_terminated_length": 926.0, + "epoch": 0.8574643660915229, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6582077895972156, + "kl": 0.0157012939453125, + "learning_rate": 1.5474961000432002e-07, + "loss": -0.0386, + "num_tokens": 155321548.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0634846687316895, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.010328846230422316, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.045874721157192114, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10327955589886446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3429 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1206.0, + "completions/max_terminated_length": 1206.0, + "completions/mean_length": 1078.1875, + "completions/mean_terminated_length": 1078.1875, + "completions/min_length": 765.0, + "completions/min_terminated_length": 765.0, + "epoch": 0.8577144286071517, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2398753926761987, + "kl": 0.015350341796875, + "learning_rate": 1.5456197979211946e-07, + "loss": -0.0021, + "num_tokens": 155363647.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.05415940284729, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.038237227065543125, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08742732814883351, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14605934866804432, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3430 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1372.0, + "completions/mean_length": 1346.75, + "completions/mean_terminated_length": 1277.0909423828125, + "completions/min_length": 1167.0, + "completions/min_terminated_length": 1167.0, + "epoch": 0.8579644911227807, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.40476207399369, + "kl": 0.016265869140625, + "learning_rate": 1.5437465089054477e-07, + "loss": -0.0022, + "num_tokens": 155412115.0, + "reward": 0.0, + "reward_std": 0.6088378429412842, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.4753183908135026, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.4199470098019532, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0938872452190116, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3431 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1392.0, + "completions/mean_length": 1386.3125, + "completions/mean_terminated_length": 1272.625, + "completions/min_length": 1104.0, + "completions/min_terminated_length": 1104.0, + "epoch": 0.8582145536384096, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.165983142630136, + "kl": 0.01093292236328125, + "learning_rate": 1.5418762344233457e-07, + "loss": 0.0265, + "num_tokens": 155472728.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.049360990524292, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.036338942736396804, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12268300978527852, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09803627446568497, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3432 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1410.0, + "completions/mean_length": 1405.375, + "completions/mean_terminated_length": 1247.666748046875, + "completions/min_length": 959.0, + "completions/min_terminated_length": 959.0, + "epoch": 0.8584646161540385, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.173992354956902, + "kl": 0.021728515625, + "learning_rate": 1.5400089758999733e-07, + "loss": 0.0056, + "num_tokens": 155528302.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6598861813545227, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.056809739566832176, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0634749997700752, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9166666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238704, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3433 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1244.0, + "completions/max_terminated_length": 1244.0, + "completions/mean_length": 1009.6875, + "completions/mean_terminated_length": 1009.6875, + "completions/min_length": 796.0, + "completions/min_terminated_length": 796.0, + "epoch": 0.8587146786696674, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.936297464711222, + "kl": 0.0152130126953125, + "learning_rate": 1.53814473475812e-07, + "loss": 0.0717, + "num_tokens": 155574329.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0304120779037476, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04857297118519766, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16558458135410803, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3434 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1490.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1054.75, + "completions/mean_terminated_length": 1054.75, + "completions/min_length": 726.0, + "completions/min_terminated_length": 726.0, + "epoch": 0.8589647411852963, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0861240576379085, + "kl": 0.0154571533203125, + "learning_rate": 1.5362835124182744e-07, + "loss": -0.0473, + "num_tokens": 155621389.0, + "reward": 0.0, + "reward_std": 0.8532738089561462, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.061656370015554594, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08447931344925813, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3435 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.9375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1335.0, + "completions/mean_length": 1489.6875, + "completions/mean_terminated_length": 1335.0, + "completions/min_length": 1335.0, + "completions/min_terminated_length": 1335.0, + "epoch": 0.8592148037009252, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.960079393096399, + "kl": 0.0121612548828125, + "learning_rate": 1.5344253102986283e-07, + "loss": -0.0081, + "num_tokens": 155682088.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0224840641021729, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00987405662115017, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.030198712630124087, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07698003589195011, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3436 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1364.0, + "completions/mean_length": 1119.5, + "completions/mean_terminated_length": 1031.6923828125, + "completions/min_length": 559.0, + "completions/min_terminated_length": 559.0, + "epoch": 0.8594648662165542, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.300672023260348, + "kl": 0.02081298828125, + "learning_rate": 1.5325701298150684e-07, + "loss": -0.1162, + "num_tokens": 155727840.0, + "reward": 0.0, + "reward_std": 0.7386235594749451, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06926031726253133, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12944761957270537, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10327955589886448, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3437 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1293.0, + "completions/max_terminated_length": 1293.0, + "completions/mean_length": 995.3125, + "completions/mean_terminated_length": 995.3125, + "completions/min_length": 757.0, + "completions/min_terminated_length": 757.0, + "epoch": 0.859714928732183, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4679973045284194, + "kl": 0.0174102783203125, + "learning_rate": 1.5307179723811813e-07, + "loss": -0.0171, + "num_tokens": 155762301.0, + "reward": 0.0, + "reward_std": 0.9106250405311584, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005830620020852386, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08081689254851326, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07290277645477447, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3438 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 1288.4375, + "completions/mean_terminated_length": 1274.3333740234375, + "completions/min_length": 1025.0, + "completions/min_terminated_length": 1025.0, + "epoch": 0.859964991247812, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9290199819237492, + "kl": 0.01300048828125, + "learning_rate": 1.5288688394082494e-07, + "loss": 0.0023, + "num_tokens": 155803348.0, + "reward": 0.0, + "reward_std": 1.0320450067520142, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01737987315680116, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06805006006804212, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06666666666666668, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3439 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1076.125, + "completions/mean_terminated_length": 1047.86669921875, + "completions/min_length": 649.0, + "completions/min_terminated_length": 649.0, + "epoch": 0.8602150537634409, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1549519638657126, + "kl": 0.020660400390625, + "learning_rate": 1.52702273230525e-07, + "loss": -0.0866, + "num_tokens": 155840614.0, + "reward": 1.862645149230957e-08, + "reward_std": 1.0163103342056274, + "rewards/wordcountpos_reward_GEOBench/mean": 1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.016118416135546494, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17549315645444785, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11925695879998881, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3440 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1484.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1269.75, + "completions/mean_terminated_length": 1269.75, + "completions/min_length": 1005.0, + "completions/min_terminated_length": 1005.0, + "epoch": 0.8604651162790697, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.230002440275731, + "kl": 0.0116424560546875, + "learning_rate": 1.525179652478856e-07, + "loss": 0.0047, + "num_tokens": 155881802.0, + "reward": 0.0, + "reward_std": 0.9322949647903442, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00010856696546577942, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.056530852837469836, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3441 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 1131.875, + "completions/mean_terminated_length": 1107.3333740234375, + "completions/min_length": 688.0, + "completions/min_terminated_length": 688.0, + "epoch": 0.8607151787946987, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3018405753912554, + "kl": 0.023956298828125, + "learning_rate": 1.523339601333431e-07, + "loss": -0.0823, + "num_tokens": 155916792.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9589496850967407, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.20141713624996815, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09793124999700811, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10327955589886446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3442 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1381.625, + "completions/mean_terminated_length": 1263.25, + "completions/min_length": 996.0, + "completions/min_terminated_length": 996.0, + "epoch": 0.8609652413103276, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.668776773265062, + "kl": 0.0154571533203125, + "learning_rate": 1.521502580271036e-07, + "loss": 0.007, + "num_tokens": 155972418.0, + "reward": 0.0, + "reward_std": 0.8563692569732666, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.022921722557824237, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0993603369752172, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620105, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3443 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1269.0, + "completions/mean_terminated_length": 1236.0, + "completions/min_length": 982.0, + "completions/min_terminated_length": 982.0, + "epoch": 0.8612153038259565, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5636938232252695, + "kl": 0.013458251953125, + "learning_rate": 1.519668590691419e-07, + "loss": 0.0243, + "num_tokens": 156023450.0, + "reward": 0.0, + "reward_std": 0.8856779336929321, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03330940932355699, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07788041809447231, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3444 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1267.0, + "completions/max_terminated_length": 1267.0, + "completions/mean_length": 1022.875, + "completions/mean_terminated_length": 1022.875, + "completions/min_length": 835.0, + "completions/min_terminated_length": 835.0, + "epoch": 0.8614653663415854, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5371547323755372, + "kl": 0.0167388916015625, + "learning_rate": 1.5178376339920167e-07, + "loss": -0.0092, + "num_tokens": 156053248.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9299553632736206, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01824214126128632, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10995277105663488, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09803627446568498, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3445 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1401.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1093.625, + "completions/mean_terminated_length": 1093.625, + "completions/min_length": 806.0, + "completions/min_terminated_length": 806.0, + "epoch": 0.8617154288572143, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.438337685369955, + "kl": 0.018890380859375, + "learning_rate": 1.5160097115679592e-07, + "loss": -0.0375, + "num_tokens": 156084250.0, + "reward": 0.0, + "reward_std": 0.4707871675491333, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04132583069374135, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.016193193674645237, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3446 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1376.0, + "completions/mean_length": 1226.0625, + "completions/mean_terminated_length": 1013.0, + "completions/min_length": 672.0, + "completions/min_terminated_length": 672.0, + "epoch": 0.8619654913728432, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.769539680634216, + "kl": 0.0130767822265625, + "learning_rate": 1.5141848248120637e-07, + "loss": -0.0349, + "num_tokens": 156131355.0, + "reward": 0.0, + "reward_std": 0.9245836734771729, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0065360626546769, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.020555676247137177, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503963, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3447 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1336.0, + "completions/max_terminated_length": 1336.0, + "completions/mean_length": 1135.125, + "completions/mean_terminated_length": 1135.125, + "completions/min_length": 1011.0, + "completions/min_terminated_length": 1011.0, + "epoch": 0.8622155538884722, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.312655337417302, + "kl": 0.02056884765625, + "learning_rate": 1.512362975114831e-07, + "loss": 0.002, + "num_tokens": 156174429.0, + "reward": 0.0, + "reward_std": 0.8823260068893433, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005997408646856613, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06145492322656509, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11122216672215289, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3448 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1412.0, + "completions/max_terminated_length": 1412.0, + "completions/mean_length": 1174.8125, + "completions/mean_terminated_length": 1174.8125, + "completions/min_length": 791.0, + "completions/min_terminated_length": 791.0, + "epoch": 0.862465616404101, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.553169468142158, + "kl": 0.01007080078125, + "learning_rate": 1.5105441638644517e-07, + "loss": 0.0203, + "num_tokens": 156229842.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9604398012161255, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03392866627946646, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.070888314484515, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3449 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1262.0, + "completions/max_terminated_length": 1262.0, + "completions/mean_length": 1017.0625, + "completions/mean_terminated_length": 1017.0625, + "completions/min_length": 598.0, + "completions/min_terminated_length": 598.0, + "epoch": 0.8627156789197299, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.726634511204085, + "kl": 0.015838623046875, + "learning_rate": 1.5087283924467982e-07, + "loss": -0.0653, + "num_tokens": 156262931.0, + "reward": 0.0, + "reward_std": 0.9686143398284912, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.008462424145667665, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13535561083510447, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11792967144619462, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3450 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1280.0, + "completions/mean_length": 1022.6875, + "completions/mean_terminated_length": 990.86669921875, + "completions/min_length": 662.0, + "completions/min_terminated_length": 662.0, + "epoch": 0.8629657414353589, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.031688682166694, + "kl": 0.0178375244140625, + "learning_rate": 1.5069156622454285e-07, + "loss": -0.0515, + "num_tokens": 156308614.0, + "reward": -1.1175870895385742e-08, + "reward_std": 0.9968280792236328, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.028233068830697167, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08183022594438079, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237135, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3451 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1073.4375, + "completions/mean_terminated_length": 1012.5000610351562, + "completions/min_length": 648.0, + "completions/min_terminated_length": 648.0, + "epoch": 0.8632158039509877, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.166815356059844, + "kl": 0.0171051025390625, + "learning_rate": 1.5051059746415824e-07, + "loss": -0.0311, + "num_tokens": 156363341.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5247182846069336, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.026171202379542034, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17007135376544064, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16666666666666669, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3452 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1428.0, + "completions/mean_length": 1250.4375, + "completions/mean_terminated_length": 1214.7857666015625, + "completions/min_length": 731.0, + "completions/min_terminated_length": 731.0, + "epoch": 0.8634658664666166, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.428639959303684, + "kl": 0.019073486328125, + "learning_rate": 1.5032993310141826e-07, + "loss": -0.0487, + "num_tokens": 156416572.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0553255081176758, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.027644638478032706, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.043759953596015075, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12931443160847217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3453 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1186.875, + "completions/mean_terminated_length": 1166.0, + "completions/min_length": 859.0, + "completions/min_terminated_length": 859.0, + "epoch": 0.8637159289822456, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.586309284283841, + "kl": 0.018341064453125, + "learning_rate": 1.501495732739831e-07, + "loss": -0.0045, + "num_tokens": 156461842.0, + "reward": 2.7939677238464355e-08, + "reward_std": 0.9672238826751709, + "rewards/wordcountpos_reward_GEOBench/mean": 2.7939677238464355e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01789365958566918, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.034645335082759056, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13492110177323527, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3454 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1060.125, + "completions/mean_terminated_length": 997.2857666015625, + "completions/min_length": 733.0, + "completions/min_terminated_length": 733.0, + "epoch": 0.8639659914978745, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.080902868976543, + "kl": 0.0189056396484375, + "learning_rate": 1.4996951811928094e-07, + "loss": 0.0007, + "num_tokens": 156500236.0, + "reward": 0.0, + "reward_std": 0.894850492477417, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00988451588843552, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.028313746489776838, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16903867626692443, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3455 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1196.375, + "completions/mean_terminated_length": 1058.3636474609375, + "completions/min_length": 766.0, + "completions/min_terminated_length": 766.0, + "epoch": 0.8642160540135033, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.539998133018119, + "kl": 0.01861572265625, + "learning_rate": 1.4978976777450786e-07, + "loss": -0.0035, + "num_tokens": 156553898.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0210528373718262, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.004293487658677505, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.011901531823066493, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1408308678285174, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3456 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1269.625, + "completions/mean_terminated_length": 1039.25, + "completions/min_length": 773.0, + "completions/min_terminated_length": 773.0, + "epoch": 0.8644661165291323, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.291574999304906, + "kl": 0.021514892578125, + "learning_rate": 1.496103223766276e-07, + "loss": -0.0517, + "num_tokens": 156602740.0, + "reward": 0.0, + "reward_std": 0.9660124778747559, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09448422058640711, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13782622275787573, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10886621079036349, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3457 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1249.25, + "completions/mean_terminated_length": 1165.666748046875, + "completions/min_length": 935.0, + "completions/min_terminated_length": 935.0, + "epoch": 0.8647161790447612, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9873281711720177, + "kl": 0.014862060546875, + "learning_rate": 1.4943118206237162e-07, + "loss": -0.0053, + "num_tokens": 156649248.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6832360029220581, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.058128140345439144, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08449340241382226, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238707, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3458 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1413.0, + "completions/mean_length": 1242.75, + "completions/mean_terminated_length": 1157.0, + "completions/min_length": 945.0, + "completions/min_terminated_length": 945.0, + "epoch": 0.8649662415603901, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7732491467228693, + "kl": 0.0154571533203125, + "learning_rate": 1.4925234696823879e-07, + "loss": 0.0413, + "num_tokens": 156690900.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0547828674316406, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01646628148556129, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.020392719004766354, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921946, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3459 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1329.9375, + "completions/mean_terminated_length": 1197.6666259765625, + "completions/min_length": 811.0, + "completions/min_terminated_length": 811.0, + "epoch": 0.865216304076019, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0141336813708017, + "kl": 0.01922607421875, + "learning_rate": 1.4907381723049566e-07, + "loss": 0.0464, + "num_tokens": 156745051.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.8614853024482727, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06145845071411732, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13438294521911834, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10671873729054747, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3460 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1258.0, + "completions/max_terminated_length": 1258.0, + "completions/mean_length": 916.125, + "completions/mean_terminated_length": 916.125, + "completions/min_length": 770.0, + "completions/min_terminated_length": 770.0, + "epoch": 0.8654663665916479, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.705403147231404, + "kl": 0.0179595947265625, + "learning_rate": 1.488955929851759e-07, + "loss": -0.0408, + "num_tokens": 156770373.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9042665958404541, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05404055480950348, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13529879069834364, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09953596037316068, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3461 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1170.0, + "completions/max_terminated_length": 1170.0, + "completions/mean_length": 967.8125, + "completions/mean_terminated_length": 967.8125, + "completions/min_length": 756.0, + "completions/min_terminated_length": 756.0, + "epoch": 0.8657164291072769, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.952236083573832, + "kl": 0.026580810546875, + "learning_rate": 1.4871767436808053e-07, + "loss": -0.0131, + "num_tokens": 156800434.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9249534606933594, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04782978849810102, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.058621054181422994, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0894427190999916, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3462 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1314.5, + "completions/mean_terminated_length": 1203.2000732421875, + "completions/min_length": 978.0, + "completions/min_terminated_length": 978.0, + "epoch": 0.8659664916229057, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6863641286701303, + "kl": 0.0121002197265625, + "learning_rate": 1.4854006151477738e-07, + "loss": 0.015, + "num_tokens": 156847634.0, + "reward": 0.0, + "reward_std": 1.0478076934814453, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1094899208341546, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18252135611026138, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3463 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1372.0, + "completions/max_terminated_length": 1372.0, + "completions/mean_length": 996.0625, + "completions/mean_terminated_length": 996.0625, + "completions/min_length": 461.0, + "completions/min_terminated_length": 461.0, + "epoch": 0.8662165541385346, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8449912899366905, + "kl": 0.022613525390625, + "learning_rate": 1.4836275456060174e-07, + "loss": 0.0013, + "num_tokens": 156893795.0, + "reward": 0.0, + "reward_std": 0.6381891369819641, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09084451523340102, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07307200010059066, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13492110177323527, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3464 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1398.0, + "completions/mean_length": 1268.0625, + "completions/mean_terminated_length": 1128.9000244140625, + "completions/min_length": 929.0, + "completions/min_terminated_length": 929.0, + "epoch": 0.8664666166541636, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8880941078104834, + "kl": 0.0198974609375, + "learning_rate": 1.481857536406556e-07, + "loss": 0.0662, + "num_tokens": 156947356.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9534765481948853, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.015015061956697327, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14925763807638423, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10671873729054748, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3465 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1170.625, + "completions/mean_terminated_length": 1148.666748046875, + "completions/min_length": 815.0, + "completions/min_terminated_length": 815.0, + "epoch": 0.8667166791697924, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.913082390454677, + "kl": 0.013916015625, + "learning_rate": 1.4800905888980765e-07, + "loss": -0.0104, + "num_tokens": 156990422.0, + "reward": 1.1175870895385742e-08, + "reward_std": 0.9215371012687683, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.015743581731228133, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08475874720242155, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10327955589886446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3466 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1363.0, + "completions/mean_length": 1124.4375, + "completions/mean_terminated_length": 1099.4000244140625, + "completions/min_length": 803.0, + "completions/min_terminated_length": 803.0, + "epoch": 0.8669667416854213, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0538300541549552, + "kl": 0.023468017578125, + "learning_rate": 1.478326704426937e-07, + "loss": -0.0417, + "num_tokens": 157026437.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0576164722442627, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12998705746127687, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.22998843337344296, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238704, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3467 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1467.0, + "completions/mean_length": 1301.75, + "completions/mean_terminated_length": 1211.6363525390625, + "completions/min_length": 1023.0, + "completions/min_terminated_length": 1023.0, + "epoch": 0.8672168042010503, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.927339959484837, + "kl": 0.0164031982421875, + "learning_rate": 1.4765658843371557e-07, + "loss": 0.005, + "num_tokens": 157082017.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.003920555114746, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03239379993074401, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08309673628622323, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238706, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3468 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1451.5625, + "completions/mean_terminated_length": 1370.8333740234375, + "completions/min_length": 1213.0, + "completions/min_terminated_length": 1213.0, + "epoch": 0.8674668667166792, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3519766424645305, + "kl": 0.0163421630859375, + "learning_rate": 1.474808129970421e-07, + "loss": -0.0017, + "num_tokens": 157138938.0, + "reward": 0.0, + "reward_std": 1.0561792850494385, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10970526693199097, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10034107678998702, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07302967433402215, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3469 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1257.0625, + "completions/mean_terminated_length": 1146.6363525390625, + "completions/min_length": 766.0, + "completions/min_terminated_length": 766.0, + "epoch": 0.867716929232308, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.166514240226506, + "kl": 0.0164337158203125, + "learning_rate": 1.4730534426660808e-07, + "loss": -0.0541, + "num_tokens": 157181763.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0087345838546753, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06942358140238905, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13573229106232615, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1338185615204685, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3470 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1457.0, + "completions/mean_length": 1195.6875, + "completions/mean_terminated_length": 959.0, + "completions/min_length": 685.0, + "completions/min_terminated_length": 685.0, + "epoch": 0.867966991747937, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.246571263981479, + "kl": 0.0255126953125, + "learning_rate": 1.4713018237611515e-07, + "loss": 0.0129, + "num_tokens": 157223118.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9499805569648743, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.020842092911601783, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0346964176496588, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10183501544346311, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3471 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1240.3125, + "completions/mean_terminated_length": 1038.3333740234375, + "completions/min_length": 708.0, + "completions/min_terminated_length": 708.0, + "epoch": 0.8682170542635659, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.978703500355158, + "kl": 0.01806640625, + "learning_rate": 1.4695532745903074e-07, + "loss": 0.0064, + "num_tokens": 157272299.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9617214202880859, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005681657398667488, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11036764466808077, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12164002752505568, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3472 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1482.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1245.3125, + "completions/mean_terminated_length": 1245.3125, + "completions/min_length": 645.0, + "completions/min_terminated_length": 645.0, + "epoch": 0.8684671167791947, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8791676316078396, + "kl": 0.03076171875, + "learning_rate": 1.4678077964858844e-07, + "loss": -0.0119, + "num_tokens": 157321768.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9601256847381592, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.021042773809373284, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1314834421940808, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1253144193766372, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3473 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1118.0, + "completions/mean_terminated_length": 888.7999877929688, + "completions/min_length": 672.0, + "completions/min_terminated_length": 672.0, + "epoch": 0.8687171792948237, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1333155383429605, + "kl": 0.0140380859375, + "learning_rate": 1.466065390777879e-07, + "loss": 0.001, + "num_tokens": 157366040.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9816408753395081, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0318286711193682, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1459052892519792, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3474 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1455.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1266.4375, + "completions/mean_terminated_length": 1266.4375, + "completions/min_length": 999.0, + "completions/min_terminated_length": 999.0, + "epoch": 0.8689672418104526, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.262521375483631, + "kl": 0.0097198486328125, + "learning_rate": 1.4643260587939468e-07, + "loss": -0.0285, + "num_tokens": 157416671.0, + "reward": 0.0, + "reward_std": 0.772428572177887, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.13393272293355085, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17017089931091306, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3475 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1213.8125, + "completions/mean_terminated_length": 1194.7333984375, + "completions/min_length": 474.0, + "completions/min_terminated_length": 474.0, + "epoch": 0.8692173043260816, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.831116359522649, + "kl": 0.01422119140625, + "learning_rate": 1.4625898018594006e-07, + "loss": -0.0442, + "num_tokens": 157464556.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.869741678237915, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0694192468232849, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12556040954983452, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09727776191382574, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3476 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.75, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1429.6875, + "completions/mean_terminated_length": 1218.75, + "completions/min_length": 1106.0, + "completions/min_terminated_length": 1106.0, + "epoch": 0.8694673668417104, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.474029021821289, + "kl": 0.01184844970703125, + "learning_rate": 1.4608566212972095e-07, + "loss": 0.0052, + "num_tokens": 157527543.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9980025291442871, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.011233412869382559, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03844097283224934, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12102953419784838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3477 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1428.0, + "completions/mean_length": 1177.1875, + "completions/mean_terminated_length": 1030.45458984375, + "completions/min_length": 717.0, + "completions/min_terminated_length": 717.0, + "epoch": 0.8697174293573393, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.015809541316324, + "kl": 0.0174560546875, + "learning_rate": 1.459126518428001e-07, + "loss": -0.0011, + "num_tokens": 157578218.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8831762075424194, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.16727472842260793, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13082038437523205, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797315, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3478 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1184.6875, + "completions/mean_terminated_length": 939.4444580078125, + "completions/min_length": 597.0, + "completions/min_terminated_length": 597.0, + "epoch": 0.8699674918729683, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0439491982469247, + "kl": 0.018035888671875, + "learning_rate": 1.457399494570056e-07, + "loss": 0.1033, + "num_tokens": 157619965.0, + "reward": 0.0, + "reward_std": 0.7663299441337585, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1935042460801752, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08358741998517877, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1060223596263578, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3479 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1349.0, + "completions/max_terminated_length": 1349.0, + "completions/mean_length": 1018.9375, + "completions/mean_terminated_length": 1018.9375, + "completions/min_length": 702.0, + "completions/min_terminated_length": 702.0, + "epoch": 0.8702175543885972, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6213418474538925, + "kl": 0.0277099609375, + "learning_rate": 1.455675551039308e-07, + "loss": 0.0052, + "num_tokens": 157649108.0, + "reward": 2.60770320892334e-08, + "reward_std": 1.0528868436813354, + "rewards/wordcountpos_reward_GEOBench/mean": 2.60770320892334e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03637123275428328, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.030511999851627395, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3480 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1189.8125, + "completions/mean_terminated_length": 1145.5, + "completions/min_length": 789.0, + "completions/min_terminated_length": 789.0, + "epoch": 0.870467616904226, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.677534300325152, + "kl": 0.019500732421875, + "learning_rate": 1.4539546891493462e-07, + "loss": -0.0698, + "num_tokens": 157704609.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5941810011863708, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.016855685797298726, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0589963654381384, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1796601730428249, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3481 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1210.0, + "completions/mean_length": 998.875, + "completions/mean_terminated_length": 927.2857666015625, + "completions/min_length": 743.0, + "completions/min_terminated_length": 743.0, + "epoch": 0.870717679419855, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6203787599512722, + "kl": 0.01690673828125, + "learning_rate": 1.452236910211409e-07, + "loss": -0.091, + "num_tokens": 157758439.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6297163963317871, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0909199976587825, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11195336659724996, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12345339501504503, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3482 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 992.9375, + "completions/mean_terminated_length": 959.1333618164062, + "completions/min_length": 606.0, + "completions/min_terminated_length": 606.0, + "epoch": 0.8709677419354839, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3471856019238175, + "kl": 0.0195465087890625, + "learning_rate": 1.450522215534387e-07, + "loss": -0.0239, + "num_tokens": 157803062.0, + "reward": 0.0, + "reward_std": 0.2555467486381531, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.297742351290703, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.29425695872233093, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1704025734460517, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3483 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1373.625, + "completions/mean_terminated_length": 1275.3333740234375, + "completions/min_length": 1102.0, + "completions/min_terminated_length": 1102.0, + "epoch": 0.8712178044511127, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5683613729388215, + "kl": 0.0179901123046875, + "learning_rate": 1.44881060642482e-07, + "loss": -0.01, + "num_tokens": 157856752.0, + "reward": 4.470348358154297e-08, + "reward_std": 1.0640108585357666, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07451674411662287, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07551375139900078, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408157, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3484 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1277.1875, + "completions/mean_terminated_length": 1245.357177734375, + "completions/min_length": 1010.0, + "completions/min_terminated_length": 1010.0, + "epoch": 0.8714678669667417, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9865515453936275, + "kl": 0.022613525390625, + "learning_rate": 1.4471020841868994e-07, + "loss": -0.0122, + "num_tokens": 157914907.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9057794809341431, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.008321245960429156, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10383656011525455, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333333, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3485 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1250.3125, + "completions/mean_terminated_length": 1000.625, + "completions/min_length": 807.0, + "completions/min_terminated_length": 807.0, + "epoch": 0.8717179294823706, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2452039817844582, + "kl": 0.0165252685546875, + "learning_rate": 1.4453966501224603e-07, + "loss": -0.0366, + "num_tokens": 157950208.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8941279649734497, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08388271482893868, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13499350410616254, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3486 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1042.0, + "completions/mean_length": 1166.6875, + "completions/mean_terminated_length": 833.375, + "completions/min_length": 661.0, + "completions/min_terminated_length": 661.0, + "epoch": 0.8719679919979995, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.058252069127164, + "kl": 0.02008056640625, + "learning_rate": 1.4436943055309868e-07, + "loss": 0.0152, + "num_tokens": 158008027.0, + "reward": 0.0, + "reward_std": 0.6393119096755981, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.004052927803591644, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1012647613237035, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1767505042163692, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3487 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1112.6875, + "completions/mean_terminated_length": 1023.3077392578125, + "completions/min_length": 610.0, + "completions/min_terminated_length": 610.0, + "epoch": 0.8722180545136284, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7487822532719006, + "kl": 0.01392364501953125, + "learning_rate": 1.4419950517096099e-07, + "loss": -0.0646, + "num_tokens": 158050494.0, + "reward": 0.0, + "reward_std": 0.7686519622802734, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02197210183457949, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09127986009383082, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09418264367902598, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3488 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1499.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1084.9375, + "completions/mean_terminated_length": 1084.9375, + "completions/min_length": 676.0, + "completions/min_terminated_length": 676.0, + "epoch": 0.8724681170292573, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8747046646497316, + "kl": 0.013641357421875, + "learning_rate": 1.4402988899531042e-07, + "loss": -0.0346, + "num_tokens": 158083405.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7263413667678833, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.008249977079935923, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03299990831974369, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087683, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3489 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1247.0, + "completions/mean_length": 1226.375, + "completions/mean_terminated_length": 1062.2000732421875, + "completions/min_length": 814.0, + "completions/min_terminated_length": 814.0, + "epoch": 0.8727181795448862, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1607042073734593, + "kl": 0.01849365234375, + "learning_rate": 1.4386058215538887e-07, + "loss": 0.0055, + "num_tokens": 158132299.0, + "reward": 0.0, + "reward_std": 0.9434891939163208, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.053680760354515376, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14761338512794406, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3490 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1210.5625, + "completions/mean_terminated_length": 1169.21435546875, + "completions/min_length": 892.0, + "completions/min_terminated_length": 892.0, + "epoch": 0.8729682420605152, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8011001191203824, + "kl": 0.017181396484375, + "learning_rate": 1.4369158478020243e-07, + "loss": 0.0085, + "num_tokens": 158162044.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9108256101608276, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04312383896369179, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04097559847689276, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11538983843829065, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3491 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1289.25, + "completions/mean_terminated_length": 1219.0, + "completions/min_length": 847.0, + "completions/min_terminated_length": 847.0, + "epoch": 0.873218304576144, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.321105045763812, + "kl": 0.0192718505859375, + "learning_rate": 1.4352289699852161e-07, + "loss": -0.0223, + "num_tokens": 158207104.0, + "reward": 0.0, + "reward_std": 0.7628594040870667, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09731346495268323, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1437413113540175, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1238278374733781, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3492 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1254.8125, + "completions/mean_terminated_length": 1198.2308349609375, + "completions/min_length": 1010.0, + "completions/min_terminated_length": 1010.0, + "epoch": 0.8734683670917729, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4152139968314854, + "kl": 0.019195556640625, + "learning_rate": 1.433545189388808e-07, + "loss": -0.0043, + "num_tokens": 158242589.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7985389828681946, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.21494530432235287, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2642645018849367, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11547005383792518, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 3493 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1413.0, + "completions/max_terminated_length": 1413.0, + "completions/mean_length": 1106.3125, + "completions/mean_terminated_length": 1106.3125, + "completions/min_length": 682.0, + "completions/min_terminated_length": 682.0, + "epoch": 0.8737184296074019, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.244563361252737, + "kl": 0.02325439453125, + "learning_rate": 1.4318645072957848e-07, + "loss": -0.0837, + "num_tokens": 158288874.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0320667028427124, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.060262269946745994, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10024609687181489, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901858, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3494 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1349.4375, + "completions/mean_terminated_length": 1299.25, + "completions/min_length": 1142.0, + "completions/min_terminated_length": 1142.0, + "epoch": 0.8739684921230307, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2890176759291743, + "kl": 0.0121612548828125, + "learning_rate": 1.430186924986771e-07, + "loss": 0.0238, + "num_tokens": 158332713.0, + "reward": 3.725290298461914e-08, + "reward_std": 1.0502734184265137, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0908576302989078, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05635076250319478, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07187952884282611, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3495 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1092.0, + "completions/mean_length": 1281.3125, + "completions/mean_terminated_length": 1062.625, + "completions/min_length": 1049.0, + "completions/min_terminated_length": 1049.0, + "epoch": 0.8742185546386597, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2702841872021677, + "kl": 0.010986328125, + "learning_rate": 1.428512443740029e-07, + "loss": -0.0028, + "num_tokens": 158373262.0, + "reward": 0.0, + "reward_std": 0.5201235413551331, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02938473386888754, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07633850717390715, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09339283817414601, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3496 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1318.4375, + "completions/mean_terminated_length": 1235.9091796875, + "completions/min_length": 1003.0, + "completions/min_terminated_length": 1003.0, + "epoch": 0.8744686171542886, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0751757120060117, + "kl": 0.019927978515625, + "learning_rate": 1.426841064831457e-07, + "loss": 0.0087, + "num_tokens": 158427501.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.0472360849380493, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.22414150240222097, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08258079054606231, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.133263870794973, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3497 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1305.75, + "completions/mean_terminated_length": 1241.0, + "completions/min_length": 945.0, + "completions/min_terminated_length": 945.0, + "epoch": 0.8747186796699175, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.553192811116213, + "kl": 0.016204833984375, + "learning_rate": 1.4251727895345894e-07, + "loss": -0.0179, + "num_tokens": 158474489.0, + "reward": 0.0, + "reward_std": 0.5647265315055847, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02388725265872131, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.048085807114638855, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797313, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3498 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1472.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1094.0, + "completions/mean_terminated_length": 1094.0, + "completions/min_length": 801.0, + "completions/min_terminated_length": 801.0, + "epoch": 0.8749687421855464, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.756088383097317, + "kl": 0.025146484375, + "learning_rate": 1.423507619120598e-07, + "loss": -0.0141, + "num_tokens": 158525969.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.39274394512176514, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.22969366761822765, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18887577694661095, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1759103306927835, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3499 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1330.0, + "completions/max_terminated_length": 1330.0, + "completions/mean_length": 980.625, + "completions/mean_terminated_length": 980.625, + "completions/min_length": 714.0, + "completions/min_terminated_length": 714.0, + "epoch": 0.8752188047011753, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8942083471946973, + "kl": 0.01751708984375, + "learning_rate": 1.4218455548582872e-07, + "loss": -0.0289, + "num_tokens": 158557683.0, + "reward": 0.0, + "reward_std": 0.6151847839355469, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.024781391881545767, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06266071848235594, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17126976771553507, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3500 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1011.25, + "completions/mean_terminated_length": 978.666748046875, + "completions/min_length": 612.0, + "completions/min_terminated_length": 612.0, + "epoch": 0.8754688672168042, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.409337381974777, + "kl": 0.016326904296875, + "learning_rate": 1.4201865980140932e-07, + "loss": -0.0968, + "num_tokens": 158595567.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6878235340118408, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03957422456513011, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10967959132125873, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563383, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3501 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1275.1875, + "completions/mean_terminated_length": 1260.2000732421875, + "completions/min_length": 995.0, + "completions/min_terminated_length": 995.0, + "epoch": 0.8757189297324331, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1497956577621613, + "kl": 0.0242919921875, + "learning_rate": 1.4185307498520877e-07, + "loss": -0.0427, + "num_tokens": 158634618.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.7934025526046753, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011876997635747169, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06850566209294377, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3502 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1384.0, + "completions/mean_length": 1104.0625, + "completions/mean_terminated_length": 1077.666748046875, + "completions/min_length": 794.0, + "completions/min_terminated_length": 794.0, + "epoch": 0.875968992248062, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.851627073737002, + "kl": 0.014373779296875, + "learning_rate": 1.4168780116339714e-07, + "loss": 0.0009, + "num_tokens": 158682387.0, + "reward": 0.0, + "reward_std": 0.5220016241073608, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11417056043464915, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1289496042982524, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10749676997731401, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3503 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1185.6875, + "completions/mean_terminated_length": 1164.7333984375, + "completions/min_length": 917.0, + "completions/min_terminated_length": 917.0, + "epoch": 0.8762190547636909, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.939942121248847, + "kl": 0.016082763671875, + "learning_rate": 1.415228384619076e-07, + "loss": 0.0194, + "num_tokens": 158717142.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0378139019012451, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03343550686021825, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06578561760655723, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3504 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1497.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1155.0, + "completions/mean_terminated_length": 1155.0, + "completions/min_length": 837.0, + "completions/min_terminated_length": 837.0, + "epoch": 0.8764691172793199, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.302572609914831, + "kl": 0.01788330078125, + "learning_rate": 1.413581870064361e-07, + "loss": 0.021, + "num_tokens": 158769926.0, + "reward": 0.0, + "reward_std": 0.7022914886474609, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.021536890154625433, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07441826282741983, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13957607775504183, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3505 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1410.8125, + "completions/mean_terminated_length": 1296.1429443359375, + "completions/min_length": 1100.0, + "completions/min_terminated_length": 1100.0, + "epoch": 0.8767191797949487, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.472697081387219, + "kl": 0.011871337890625, + "learning_rate": 1.4119384692244176e-07, + "loss": 0.0342, + "num_tokens": 158830691.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0005381107330322, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.017325692789290092, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0889618228966775, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07084150279686703, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3506 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1131.0, + "completions/max_terminated_length": 1131.0, + "completions/mean_length": 820.75, + "completions/mean_terminated_length": 820.75, + "completions/min_length": 571.0, + "completions/min_terminated_length": 571.0, + "epoch": 0.8769692423105776, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6841760643183865, + "kl": 0.02099609375, + "learning_rate": 1.410298183351462e-07, + "loss": 0.014, + "num_tokens": 158857967.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0336726903915405, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.018326345470269637, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0980200806779491, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08933913745655643, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3507 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1386.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 1105.0625, + "completions/mean_terminated_length": 1105.0625, + "completions/min_length": 932.0, + "completions/min_terminated_length": 932.0, + "epoch": 0.8772193048262066, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9013631145689054, + "kl": 0.015899658203125, + "learning_rate": 1.408661013695338e-07, + "loss": -0.0165, + "num_tokens": 158905512.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0448307991027832, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.014266083228096397, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.059728291159211626, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921946, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3508 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1489.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1227.625, + "completions/mean_terminated_length": 1227.625, + "completions/min_length": 682.0, + "completions/min_terminated_length": 682.0, + "epoch": 0.8774693673418354, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2062845875930064, + "kl": 0.013031005859375, + "learning_rate": 1.4070269615035133e-07, + "loss": -0.0819, + "num_tokens": 158956066.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.9388107061386108, + "rewards/wordcountpos_reward_GEOBench/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.026492567164119123, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19359579615802144, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11279282877125756, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3509 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1235.6875, + "completions/mean_terminated_length": 1218.0667724609375, + "completions/min_length": 906.0, + "completions/min_terminated_length": 906.0, + "epoch": 0.8777194298574643, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7683328213885816, + "kl": 0.017486572265625, + "learning_rate": 1.4053960280210822e-07, + "loss": -0.0244, + "num_tokens": 158995157.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9276263117790222, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.027880935244580837, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2539799237372688, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353545, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3510 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1208.5625, + "completions/mean_terminated_length": 1111.416748046875, + "completions/min_length": 791.0, + "completions/min_terminated_length": 791.0, + "epoch": 0.8779694923730933, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1203391881953726, + "kl": 0.0211029052734375, + "learning_rate": 1.403768214490762e-07, + "loss": -0.0344, + "num_tokens": 159049606.0, + "reward": 0.0, + "reward_std": 1.0071167945861816, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0685468573618913, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12002543336633237, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11417984514369003, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3511 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1318.4375, + "completions/mean_terminated_length": 1292.5, + "completions/min_length": 1094.0, + "completions/min_terminated_length": 1094.0, + "epoch": 0.8782195548887222, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9529468886814896, + "kl": 0.020721435546875, + "learning_rate": 1.4021435221528903e-07, + "loss": -0.0015, + "num_tokens": 159097957.0, + "reward": 2.2351741790771484e-08, + "reward_std": 0.9481773376464844, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09328029686584725, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11692532720668032, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06871842709362769, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3512 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1345.5625, + "completions/mean_terminated_length": 1225.4444580078125, + "completions/min_length": 1076.0, + "completions/min_terminated_length": 1076.0, + "epoch": 0.878469617404351, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5044688211344757, + "kl": 0.014007568359375, + "learning_rate": 1.4005219522454323e-07, + "loss": -0.0109, + "num_tokens": 159155566.0, + "reward": 0.0, + "reward_std": 0.7107890844345093, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12380649570096673, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1690152663553098, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.102469507659596, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3513 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1410.0, + "completions/mean_length": 1232.3125, + "completions/mean_terminated_length": 1143.0833740234375, + "completions/min_length": 949.0, + "completions/min_terminated_length": 949.0, + "epoch": 0.87871967991998, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.062343034567933, + "kl": 0.020263671875, + "learning_rate": 1.3989035060039674e-07, + "loss": -0.0122, + "num_tokens": 159203923.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.511909008026123, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.23791728246855987, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0799469795379469, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13526380260918403, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3514 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1317.75, + "completions/mean_terminated_length": 1257.0, + "completions/min_length": 870.0, + "completions/min_terminated_length": 870.0, + "epoch": 0.8789697424356089, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.873635178599892, + "kl": 0.0177001953125, + "learning_rate": 1.3972881846616995e-07, + "loss": -0.0184, + "num_tokens": 159254599.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9416719675064087, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.18562499999999998, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3990817585407782, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298362, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3515 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1382.0, + "completions/mean_length": 1185.875, + "completions/mean_terminated_length": 1141.0, + "completions/min_length": 825.0, + "completions/min_terminated_length": 825.0, + "epoch": 0.8792198049512379, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.439366918776763, + "kl": 0.00952911376953125, + "learning_rate": 1.3956759894494497e-07, + "loss": 0.0389, + "num_tokens": 159303805.0, + "reward": 0.0, + "reward_std": 0.6550867557525635, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.19954304608003487, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1841576788578978, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3516 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1413.0, + "completions/max_terminated_length": 1413.0, + "completions/mean_length": 1073.125, + "completions/mean_terminated_length": 1073.125, + "completions/min_length": 846.0, + "completions/min_terminated_length": 846.0, + "epoch": 0.8794698674668667, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.359065522958409, + "kl": 0.0166778564453125, + "learning_rate": 1.3940669215956557e-07, + "loss": -0.0603, + "num_tokens": 159354799.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9310938119888306, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.030773880546804692, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05289350045351483, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12285191326386657, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3517 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1483.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1295.9375, + "completions/mean_terminated_length": 1295.9375, + "completions/min_length": 1011.0, + "completions/min_terminated_length": 1011.0, + "epoch": 0.8797199299824956, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8147589871222185, + "kl": 0.029876708984375, + "learning_rate": 1.3924609823263756e-07, + "loss": 0.0018, + "num_tokens": 159401254.0, + "reward": 0.0, + "reward_std": 0.7311854362487793, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.049674994051451316, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10765539334215933, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15723301886761007, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3518 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1357.9375, + "completions/mean_terminated_length": 1325.1539306640625, + "completions/min_length": 977.0, + "completions/min_terminated_length": 977.0, + "epoch": 0.8799699924981246, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9271006580877135, + "kl": 0.020904541015625, + "learning_rate": 1.390858172865279e-07, + "loss": -0.0152, + "num_tokens": 159452469.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.43699154257774353, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09242500133883728, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14163522206595558, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13221755360572016, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3519 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1324.75, + "completions/mean_terminated_length": 1219.5999755859375, + "completions/min_length": 896.0, + "completions/min_terminated_length": 896.0, + "epoch": 0.8802200550137534, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4597555252437653, + "kl": 0.0169677734375, + "learning_rate": 1.3892584944336578e-07, + "loss": 0.0026, + "num_tokens": 159509017.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0518686771392822, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0526955401147688, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10358733926276682, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11213417888437975, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3520 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1209.3125, + "completions/mean_terminated_length": 1142.2308349609375, + "completions/min_length": 678.0, + "completions/min_terminated_length": 678.0, + "epoch": 0.8804701175293823, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8491666660463486, + "kl": 0.01666259765625, + "learning_rate": 1.3876619482504098e-07, + "loss": -0.0371, + "num_tokens": 159547214.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.0544872283935547, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009618043216956428, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03719961380373495, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17126976771553507, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3521 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1316.0, + "completions/max_terminated_length": 1316.0, + "completions/mean_length": 1051.875, + "completions/mean_terminated_length": 1051.875, + "completions/min_length": 816.0, + "completions/min_terminated_length": 816.0, + "epoch": 0.8807201800450113, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5519572550712297, + "kl": 0.025299072265625, + "learning_rate": 1.386068535532051e-07, + "loss": 0.0164, + "num_tokens": 159590996.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.473701536655426, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09726913185818885, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1722142924691436, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1344398529978149, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3522 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1317.9375, + "completions/mean_terminated_length": 1235.181884765625, + "completions/min_length": 983.0, + "completions/min_terminated_length": 983.0, + "epoch": 0.8809702425606402, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9579156486562013, + "kl": 0.0129241943359375, + "learning_rate": 1.38447825749271e-07, + "loss": 0.0223, + "num_tokens": 159628603.0, + "reward": 0.0, + "reward_std": 0.9003299474716187, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03698025993967101, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06338152683586933, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408155, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3523 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1260.875, + "completions/mean_terminated_length": 1226.71435546875, + "completions/min_length": 992.0, + "completions/min_terminated_length": 992.0, + "epoch": 0.881220305076269, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.914004865577575, + "kl": 0.0160980224609375, + "learning_rate": 1.382891115344125e-07, + "loss": -0.0064, + "num_tokens": 159664113.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6682330369949341, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.014584463167840688, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04754250991779245, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10318986456114841, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3524 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1357.0, + "completions/max_terminated_length": 1357.0, + "completions/mean_length": 1138.1875, + "completions/mean_terminated_length": 1138.1875, + "completions/min_length": 914.0, + "completions/min_terminated_length": 914.0, + "epoch": 0.881470367591898, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0101299752111528, + "kl": 0.0182952880859375, + "learning_rate": 1.3813071102956464e-07, + "loss": 0.0028, + "num_tokens": 159710812.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0462157726287842, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03720604915046832, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08305760905107105, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746354, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3525 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1367.0, + "completions/max_terminated_length": 1367.0, + "completions/mean_length": 1028.5625, + "completions/mean_terminated_length": 1028.5625, + "completions/min_length": 899.0, + "completions/min_terminated_length": 899.0, + "epoch": 0.8817204301075269, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.407488832300473, + "kl": 0.016082763671875, + "learning_rate": 1.3797262435542328e-07, + "loss": -0.0273, + "num_tokens": 159754053.0, + "reward": 0.0, + "reward_std": 1.0049893856048584, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05716185854324793, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10641062648729976, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13381856152046848, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 3526 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1395.0, + "completions/max_terminated_length": 1395.0, + "completions/mean_length": 1135.875, + "completions/mean_terminated_length": 1135.875, + "completions/min_length": 947.0, + "completions/min_terminated_length": 947.0, + "epoch": 0.8819704926231557, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9260638264867325, + "kl": 0.021087646484375, + "learning_rate": 1.3781485163244513e-07, + "loss": 0.03, + "num_tokens": 159790899.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8345519304275513, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.017725526703367855, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04374798461082271, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10671873729054751, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3527 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1370.0, + "completions/mean_length": 1182.125, + "completions/mean_terminated_length": 1160.933349609375, + "completions/min_length": 873.0, + "completions/min_terminated_length": 873.0, + "epoch": 0.8822205551387847, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3489279685750906, + "kl": 0.0149993896484375, + "learning_rate": 1.3765739298084792e-07, + "loss": 0.0528, + "num_tokens": 159833757.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9944886565208435, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.007848807334831693, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16996406207058615, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026005, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3528 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1338.4375, + "completions/mean_terminated_length": 1212.77783203125, + "completions/min_length": 966.0, + "completions/min_terminated_length": 966.0, + "epoch": 0.8824706176544136, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.61477200059242, + "kl": 0.0128631591796875, + "learning_rate": 1.375002485206097e-07, + "loss": 0.0187, + "num_tokens": 159878620.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0634708404541016, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.025701312173860437, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05994270438436686, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0910840068085298, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3529 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1403.0, + "completions/max_terminated_length": 1403.0, + "completions/mean_length": 1188.25, + "completions/mean_terminated_length": 1188.25, + "completions/min_length": 828.0, + "completions/min_terminated_length": 828.0, + "epoch": 0.8827206801700425, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.356150131582722, + "kl": 0.022216796875, + "learning_rate": 1.3734341837146963e-07, + "loss": -0.0229, + "num_tokens": 159924480.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.898999810218811, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.016474395404358813, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0293873239778085, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10809803506625451, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3530 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1248.9375, + "completions/mean_terminated_length": 1232.2000732421875, + "completions/min_length": 1115.0, + "completions/min_terminated_length": 1115.0, + "epoch": 0.8829707426856714, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.297515616629857, + "kl": 0.018646240234375, + "learning_rate": 1.3718690265292676e-07, + "loss": 0.0038, + "num_tokens": 159970175.0, + "reward": 0.0, + "reward_std": 0.6260489821434021, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.013034012682410952, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05847284761268363, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16487930490266264, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3531 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1388.6875, + "completions/mean_terminated_length": 1245.571533203125, + "completions/min_length": 799.0, + "completions/min_terminated_length": 799.0, + "epoch": 0.8832208052013003, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.381165167913092, + "kl": 0.0181427001953125, + "learning_rate": 1.370307014842411e-07, + "loss": -0.0478, + "num_tokens": 160022658.0, + "reward": 0.0, + "reward_std": 0.524826169013977, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.13039640618712234, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07976927553702597, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13655822255780922, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3532 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1269.0, + "completions/mean_terminated_length": 1130.4000244140625, + "completions/min_length": 924.0, + "completions/min_terminated_length": 924.0, + "epoch": 0.8834708677169293, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1175231055950414, + "kl": 0.0193328857421875, + "learning_rate": 1.368748149844326e-07, + "loss": 0.0116, + "num_tokens": 160067930.0, + "reward": 0.0, + "reward_std": 0.5842426419258118, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12972208358190615, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13900171756895408, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620105, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3533 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1412.3125, + "completions/mean_terminated_length": 1266.166748046875, + "completions/min_length": 1014.0, + "completions/min_terminated_length": 1014.0, + "epoch": 0.8837209302325582, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8454541415283345, + "kl": 0.0157470703125, + "learning_rate": 1.3671924327228163e-07, + "loss": -0.0303, + "num_tokens": 160122487.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9036304950714111, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04458633898092737, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14064828003698995, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620105, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3534 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1290.6875, + "completions/mean_terminated_length": 1220.916748046875, + "completions/min_length": 863.0, + "completions/min_terminated_length": 863.0, + "epoch": 0.883970992748187, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.788082720372536, + "kl": 0.02093505859375, + "learning_rate": 1.365639864663287e-07, + "loss": 0.0131, + "num_tokens": 160167682.0, + "reward": 0.0, + "reward_std": 0.4681819677352905, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00948255511925099, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05264816572350207, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408158, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3535 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1255.0, + "completions/max_terminated_length": 1255.0, + "completions/mean_length": 914.5, + "completions/mean_terminated_length": 914.5, + "completions/min_length": 701.0, + "completions/min_terminated_length": 701.0, + "epoch": 0.884221055263816, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.627261774265375, + "kl": 0.0267333984375, + "learning_rate": 1.3640904468487417e-07, + "loss": -0.0644, + "num_tokens": 160205402.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9560893774032593, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.015217387093962205, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09814744611845784, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15869840952317446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3536 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1413.0, + "completions/mean_length": 1077.875, + "completions/mean_terminated_length": 1017.5714721679688, + "completions/min_length": 559.0, + "completions/min_terminated_length": 559.0, + "epoch": 0.8844711177794449, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1182283577780465, + "kl": 0.0173797607421875, + "learning_rate": 1.3625441804597878e-07, + "loss": -0.093, + "num_tokens": 160248760.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9593243598937988, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.33685344059625366, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3273049885592967, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13984117975602023, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3537 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1350.0, + "completions/mean_length": 1328.625, + "completions/mean_terminated_length": 1157.25, + "completions/min_length": 1019.0, + "completions/min_terminated_length": 1019.0, + "epoch": 0.8847211802950737, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5200022173451675, + "kl": 0.014617919921875, + "learning_rate": 1.3610010666746286e-07, + "loss": -0.0064, + "num_tokens": 160297178.0, + "reward": 0.0, + "reward_std": 0.627517580986023, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.18170866100715077, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09912981056339287, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298364, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3538 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1288.5625, + "completions/mean_terminated_length": 1258.357177734375, + "completions/min_length": 1082.0, + "completions/min_terminated_length": 1082.0, + "epoch": 0.8849712428107027, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.857850379241738, + "kl": 0.022125244140625, + "learning_rate": 1.3594611066690636e-07, + "loss": -0.0037, + "num_tokens": 160342363.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6004735231399536, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02972768479005567, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09663941201136592, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06540472290116194, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3539 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1150.0, + "completions/max_terminated_length": 1150.0, + "completions/mean_length": 859.375, + "completions/mean_terminated_length": 859.375, + "completions/min_length": 596.0, + "completions/min_terminated_length": 596.0, + "epoch": 0.8852213053263316, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7211644218662867, + "kl": 0.026702880859375, + "learning_rate": 1.3579243016164917e-07, + "loss": -0.0797, + "num_tokens": 160366697.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9860749244689941, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.014922980496595335, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02933702894990975, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11855612829185828, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 3540 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1378.0, + "completions/max_terminated_length": 1378.0, + "completions/mean_length": 853.4375, + "completions/mean_terminated_length": 853.4375, + "completions/min_length": 726.0, + "completions/min_terminated_length": 726.0, + "epoch": 0.8854713678419605, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2480405589566628, + "kl": 0.023590087890625, + "learning_rate": 1.356390652687908e-07, + "loss": -0.0501, + "num_tokens": 160395816.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9678257703781128, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01983949705816295, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06076941036812793, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952505, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3541 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1152.5625, + "completions/mean_terminated_length": 1129.4000244140625, + "completions/min_length": 813.0, + "completions/min_terminated_length": 813.0, + "epoch": 0.8857214303575894, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7543759744220537, + "kl": 0.0141143798828125, + "learning_rate": 1.354860161051902e-07, + "loss": -0.0381, + "num_tokens": 160430193.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9886905550956726, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.045298598112605164, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07359364829919353, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07503085784948506, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3542 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1269.375, + "completions/mean_terminated_length": 1131.0, + "completions/min_length": 655.0, + "completions/min_terminated_length": 655.0, + "epoch": 0.8859714928732183, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.995393649794989, + "kl": 0.02313232421875, + "learning_rate": 1.353332827874657e-07, + "loss": -0.0782, + "num_tokens": 160482687.0, + "reward": 1.1175870895385742e-08, + "reward_std": 0.8963152766227722, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.13718101723807183, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12975591173333118, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.95, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.057089922571845, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3543 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1296.0, + "completions/max_terminated_length": 1296.0, + "completions/mean_length": 1026.1875, + "completions/mean_terminated_length": 1026.1875, + "completions/min_length": 900.0, + "completions/min_terminated_length": 900.0, + "epoch": 0.8862215553888472, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2882836894003895, + "kl": 0.0188446044921875, + "learning_rate": 1.3518086543199502e-07, + "loss": -0.0751, + "num_tokens": 160532114.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0381067991256714, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.029001979111389322, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07779003390239374, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1338185615204685, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3544 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1269.0, + "completions/max_terminated_length": 1269.0, + "completions/mean_length": 965.875, + "completions/mean_terminated_length": 965.875, + "completions/min_length": 702.0, + "completions/min_terminated_length": 702.0, + "epoch": 0.8864716179044762, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6130281504860275, + "kl": 0.021392822265625, + "learning_rate": 1.3502876415491515e-07, + "loss": 0.0252, + "num_tokens": 160574864.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0333329439163208, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.006782021774483966, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06605244803346991, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0969917904124231, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3545 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1322.0, + "completions/mean_length": 1101.3125, + "completions/mean_terminated_length": 968.4166870117188, + "completions/min_length": 609.0, + "completions/min_terminated_length": 609.0, + "epoch": 0.886721680420105, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.299832743928895, + "kl": 0.019073486328125, + "learning_rate": 1.348769790721222e-07, + "loss": -0.0632, + "num_tokens": 160612893.0, + "reward": 0.0, + "reward_std": 1.050926685333252, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02206427535787823, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08910690163178338, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11979921473804349, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3546 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1282.0, + "completions/mean_length": 1133.375, + "completions/mean_terminated_length": 1081.0, + "completions/min_length": 614.0, + "completions/min_terminated_length": 614.0, + "epoch": 0.8869717429357339, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.26909394770134, + "kl": 0.02069091796875, + "learning_rate": 1.347255102992712e-07, + "loss": -0.0088, + "num_tokens": 160669315.0, + "reward": -4.470348358154297e-08, + "reward_std": 0.9650551080703735, + "rewards/wordcountpos_reward_GEOBench/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019884166192684127, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08802806057291977, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3547 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1200.5625, + "completions/mean_terminated_length": 1157.7857666015625, + "completions/min_length": 815.0, + "completions/min_terminated_length": 815.0, + "epoch": 0.8872218054513629, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.146794604146985, + "kl": 0.025482177734375, + "learning_rate": 1.3457435795177653e-07, + "loss": 0.0201, + "num_tokens": 160712892.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.727325439453125, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.006006243435311713, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1041039341740131, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3548 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1371.0, + "completions/max_terminated_length": 1371.0, + "completions/mean_length": 1079.4375, + "completions/mean_terminated_length": 1079.4375, + "completions/min_length": 952.0, + "completions/min_terminated_length": 952.0, + "epoch": 0.8874718679669917, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.598080839400061, + "kl": 0.00997161865234375, + "learning_rate": 1.3442352214481124e-07, + "loss": -0.043, + "num_tokens": 160753931.0, + "reward": 0.0, + "reward_std": 0.7337296009063721, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08651260443472564, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12072296704888132, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503963, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3549 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1360.0, + "completions/max_terminated_length": 1360.0, + "completions/mean_length": 1037.8125, + "completions/mean_terminated_length": 1037.8125, + "completions/min_length": 806.0, + "completions/min_terminated_length": 806.0, + "epoch": 0.8877219304826206, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.281590631449273, + "kl": 0.0190582275390625, + "learning_rate": 1.3427300299330713e-07, + "loss": -0.006, + "num_tokens": 160805400.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0618432760238647, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04081906363630081, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04290463844691291, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11344765475923412, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3550 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1470.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1269.4375, + "completions/mean_terminated_length": 1269.4375, + "completions/min_length": 1017.0, + "completions/min_terminated_length": 1017.0, + "epoch": 0.8879719929982496, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1399881026158796, + "kl": 0.022216796875, + "learning_rate": 1.341228006119548e-07, + "loss": -0.04, + "num_tokens": 160852655.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5040320754051208, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.023333856640284277, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1158063626097932, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06426219440409445, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3551 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1194.0, + "completions/mean_length": 1062.125, + "completions/mean_terminated_length": 999.5714721679688, + "completions/min_length": 826.0, + "completions/min_terminated_length": 826.0, + "epoch": 0.8882220555138785, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0819957767351314, + "kl": 0.016815185546875, + "learning_rate": 1.3397291511520346e-07, + "loss": -0.0333, + "num_tokens": 160885657.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9128242135047913, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.13780979758072068, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14193987397402025, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12758439472669758, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3552 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1316.0, + "completions/mean_length": 1246.8125, + "completions/mean_terminated_length": 1094.9000244140625, + "completions/min_length": 840.0, + "completions/min_terminated_length": 840.0, + "epoch": 0.8884721180295074, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3490632931610227, + "kl": 0.0136871337890625, + "learning_rate": 1.3382334661726087e-07, + "loss": 0.0174, + "num_tokens": 160942174.0, + "reward": 0.0, + "reward_std": 0.7357656955718994, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02498098515459305, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06216912187301533, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16049691355057039, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3553 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1225.0, + "completions/max_terminated_length": 1225.0, + "completions/mean_length": 1028.4375, + "completions/mean_terminated_length": 1028.4375, + "completions/min_length": 816.0, + "completions/min_terminated_length": 816.0, + "epoch": 0.8887221805451363, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1600494129588204, + "kl": 0.0200653076171875, + "learning_rate": 1.3367409523209326e-07, + "loss": -0.0139, + "num_tokens": 160981829.0, + "reward": 0.0, + "reward_std": 0.9353391528129578, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04294263638409704, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11518775535011651, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08027729719194868, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3554 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1472.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1054.0625, + "completions/mean_terminated_length": 1054.0625, + "completions/min_length": 728.0, + "completions/min_terminated_length": 728.0, + "epoch": 0.8889722430607652, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.723974331318433, + "kl": 0.023468017578125, + "learning_rate": 1.3352516107342526e-07, + "loss": 0.0046, + "num_tokens": 161032286.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.832076907157898, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011288219221838664, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11522776107636191, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1167460047694551, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3555 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1240.3125, + "completions/mean_terminated_length": 1084.5, + "completions/min_length": 915.0, + "completions/min_terminated_length": 915.0, + "epoch": 0.8892223055763941, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.354057973609401, + "kl": 0.0118255615234375, + "learning_rate": 1.3337654425473982e-07, + "loss": -0.0036, + "num_tokens": 161076499.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9981290102005005, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.006607904790996943, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04116169449370047, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11279282877125754, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3556 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1206.4375, + "completions/mean_terminated_length": 1138.6923828125, + "completions/min_length": 856.0, + "completions/min_terminated_length": 856.0, + "epoch": 0.889472368092023, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9671760492272035, + "kl": 0.016754150390625, + "learning_rate": 1.3322824488927773e-07, + "loss": 0.0352, + "num_tokens": 161121202.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.4632115662097931, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04436309618489451, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11404969078924806, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09803627446568496, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3557 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1309.25, + "completions/mean_terminated_length": 1265.2308349609375, + "completions/min_length": 865.0, + "completions/min_terminated_length": 865.0, + "epoch": 0.8897224306076519, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9010704184648053, + "kl": 0.01861572265625, + "learning_rate": 1.330802630900385e-07, + "loss": 0.0155, + "num_tokens": 161164934.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9927825927734375, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08322400096455858, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12600383288121456, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.047919685895217376, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3558 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1436.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1023.5625, + "completions/mean_terminated_length": 1023.5625, + "completions/min_length": 772.0, + "completions/min_terminated_length": 772.0, + "epoch": 0.8899724931232809, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.47522556477657, + "kl": 0.01837158203125, + "learning_rate": 1.3293259896977915e-07, + "loss": -0.0293, + "num_tokens": 161202535.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0474539995193481, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0380755507191433, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.045395063089140306, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10327955589886446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3559 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1293.0, + "completions/mean_length": 1186.25, + "completions/mean_terminated_length": 1165.3333740234375, + "completions/min_length": 858.0, + "completions/min_terminated_length": 858.0, + "epoch": 0.8902225556389097, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0617207467814276, + "kl": 0.00870513916015625, + "learning_rate": 1.3278525264101497e-07, + "loss": 0.0185, + "num_tokens": 161241803.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0552499294281006, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09170535911372843, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0742196691657697, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746357, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3560 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1388.0, + "completions/max_terminated_length": 1388.0, + "completions/mean_length": 1054.8125, + "completions/mean_terminated_length": 1054.8125, + "completions/min_length": 623.0, + "completions/min_terminated_length": 623.0, + "epoch": 0.8904726181545386, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7403868339959963, + "kl": 0.018310546875, + "learning_rate": 1.3263822421601878e-07, + "loss": -0.0159, + "num_tokens": 161293496.0, + "reward": 0.0, + "reward_std": 0.8264500498771667, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.052483935223950104, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03738304027939481, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10532137766186216, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3561 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1476.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1046.1875, + "completions/mean_terminated_length": 1046.1875, + "completions/min_length": 613.0, + "completions/min_terminated_length": 613.0, + "epoch": 0.8907226806701676, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.395751726766096, + "kl": 0.02349853515625, + "learning_rate": 1.3249151380682151e-07, + "loss": -0.0043, + "num_tokens": 161345731.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9576758146286011, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.015723636404359923, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0530726925936905, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14807405554629052, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3562 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1415.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 1162.625, + "completions/mean_terminated_length": 1162.625, + "completions/min_length": 829.0, + "completions/min_terminated_length": 829.0, + "epoch": 0.8909727431857964, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4206112453880886, + "kl": 0.015960693359375, + "learning_rate": 1.323451215252116e-07, + "loss": -0.0336, + "num_tokens": 161395133.0, + "reward": 4.470348358154297e-08, + "reward_std": 0.9596651196479797, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.030835201967581435, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2294429046537533, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0958393717904348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3563 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1198.625, + "completions/mean_terminated_length": 1098.166748046875, + "completions/min_length": 841.0, + "completions/min_terminated_length": 841.0, + "epoch": 0.8912228057014253, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.968519103564201, + "kl": 0.0185546875, + "learning_rate": 1.3219904748273497e-07, + "loss": -0.0421, + "num_tokens": 161443767.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9735491871833801, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06894800877251332, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1567286410436714, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15104573749303493, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3564 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1428.0, + "completions/max_terminated_length": 1428.0, + "completions/mean_length": 1109.875, + "completions/mean_terminated_length": 1109.875, + "completions/min_length": 893.0, + "completions/min_terminated_length": 893.0, + "epoch": 0.8914728682170543, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9159356889186934, + "kl": 0.0164642333984375, + "learning_rate": 1.320532917906953e-07, + "loss": 0.0036, + "num_tokens": 161484941.0, + "reward": 0.0, + "reward_std": 0.7087223529815674, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08361923848199437, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08486897320903564, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10027739304327549, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3565 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1323.0, + "completions/mean_length": 1146.75, + "completions/mean_terminated_length": 1123.2000732421875, + "completions/min_length": 779.0, + "completions/min_terminated_length": 779.0, + "epoch": 0.8917229307326832, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9856414074235054, + "kl": 0.01995849609375, + "learning_rate": 1.3190785456015357e-07, + "loss": 0.0113, + "num_tokens": 161518881.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8232381343841553, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05980345492172554, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.054424005516772546, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316811, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3566 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1353.0, + "completions/mean_length": 1274.4375, + "completions/mean_terminated_length": 1099.0, + "completions/min_length": 876.0, + "completions/min_terminated_length": 876.0, + "epoch": 0.891972993248312, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7369292594494445, + "kl": 0.015533447265625, + "learning_rate": 1.3176273590192815e-07, + "loss": 0.0006, + "num_tokens": 161565896.0, + "reward": 0.0, + "reward_std": 0.96779465675354, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.040910017295453106, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10902720271279383, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3567 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1478.0, + "completions/mean_length": 1348.75, + "completions/mean_terminated_length": 1258.0, + "completions/min_length": 857.0, + "completions/min_terminated_length": 857.0, + "epoch": 0.892223055763941, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1626268304462655, + "kl": 0.024078369140625, + "learning_rate": 1.3161793592659458e-07, + "loss": 0.0032, + "num_tokens": 161620748.0, + "reward": 0.0, + "reward_std": 0.4546915590763092, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.18023618520420237, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21333055730346148, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901862, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3568 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1257.0, + "completions/max_terminated_length": 1257.0, + "completions/mean_length": 967.625, + "completions/mean_terminated_length": 967.625, + "completions/min_length": 702.0, + "completions/min_terminated_length": 702.0, + "epoch": 0.8924731182795699, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3877533738222207, + "kl": 0.017425537109375, + "learning_rate": 1.3147345474448561e-07, + "loss": -0.0374, + "num_tokens": 161650790.0, + "reward": 0.0, + "reward_std": 0.7703821659088135, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09166957849937679, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20924607522538408, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.102469507659596, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3569 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1406.0, + "completions/mean_length": 1174.5, + "completions/mean_terminated_length": 1152.800048828125, + "completions/min_length": 957.0, + "completions/min_terminated_length": 957.0, + "epoch": 0.8927231807951987, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5644629001797377, + "kl": 0.02020263671875, + "learning_rate": 1.3132929246569114e-07, + "loss": -0.0008, + "num_tokens": 161693966.0, + "reward": 0.0, + "reward_std": 0.9928744435310364, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03925776210203896, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1479479547394026, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08933913745655642, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3570 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1185.1875, + "completions/mean_terminated_length": 1164.2000732421875, + "completions/min_length": 888.0, + "completions/min_terminated_length": 888.0, + "epoch": 0.8929732433108277, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.034387375370565, + "kl": 0.020904541015625, + "learning_rate": 1.3118544920005791e-07, + "loss": 0.015, + "num_tokens": 161734697.0, + "reward": 1.1175870895385742e-08, + "reward_std": 1.0663858652114868, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03945628834415938, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14995498108413294, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3571 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1398.0, + "completions/max_terminated_length": 1398.0, + "completions/mean_length": 1114.9375, + "completions/mean_terminated_length": 1114.9375, + "completions/min_length": 764.0, + "completions/min_terminated_length": 764.0, + "epoch": 0.8932233058264566, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.606417498184678, + "kl": 0.02130126953125, + "learning_rate": 1.3104192505718993e-07, + "loss": -0.0155, + "num_tokens": 161792032.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9936434030532837, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.051986471931174824, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.028190837311032196, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10878112581387148, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3572 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1350.0, + "completions/max_terminated_length": 1350.0, + "completions/mean_length": 1170.5625, + "completions/mean_terminated_length": 1170.5625, + "completions/min_length": 947.0, + "completions/min_terminated_length": 947.0, + "epoch": 0.8934733683420856, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2907553397739124, + "kl": 0.023345947265625, + "learning_rate": 1.308987201464477e-07, + "loss": 0.0389, + "num_tokens": 161821977.0, + "reward": 0.0, + "reward_std": 0.9788131713867188, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.045736336663911116, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0823480887350111, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10532137766186214, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3573 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1183.9375, + "completions/mean_terminated_length": 1111.0, + "completions/min_length": 793.0, + "completions/min_terminated_length": 793.0, + "epoch": 0.8937234308577144, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.736829513835388, + "kl": 0.0151824951171875, + "learning_rate": 1.307558345769487e-07, + "loss": -0.0252, + "num_tokens": 161863408.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.4785347878932953, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0417716963257939, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09024172169718551, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14782371884055637, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3574 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1451.0, + "completions/mean_length": 1294.625, + "completions/mean_terminated_length": 1265.2857666015625, + "completions/min_length": 1078.0, + "completions/min_terminated_length": 1078.0, + "epoch": 0.8939734933733433, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2042757770337578, + "kl": 0.01763916015625, + "learning_rate": 1.3061326845756667e-07, + "loss": -0.0051, + "num_tokens": 161916634.0, + "reward": 0.0, + "reward_std": 1.0108994245529175, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.008700311915992038, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.016748170617825756, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3575 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1249.625, + "completions/mean_terminated_length": 1191.84619140625, + "completions/min_length": 998.0, + "completions/min_terminated_length": 998.0, + "epoch": 0.8942235558889723, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.451019409175792, + "kl": 0.022247314453125, + "learning_rate": 1.3047102189693266e-07, + "loss": -0.0176, + "num_tokens": 161966932.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8440049886703491, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06982557131515697, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12396111193535751, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1002773930432755, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3576 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1373.0, + "completions/mean_length": 1274.5625, + "completions/mean_terminated_length": 1139.300048828125, + "completions/min_length": 808.0, + "completions/min_terminated_length": 808.0, + "epoch": 0.8944736184046012, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2146083873138167, + "kl": 0.024658203125, + "learning_rate": 1.3032909500343356e-07, + "loss": -0.0717, + "num_tokens": 162012029.0, + "reward": 0.0, + "reward_std": 1.0509064197540283, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0782916071486745, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04527005963249042, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066221, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3577 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1137.4375, + "completions/mean_terminated_length": 972.6364135742188, + "completions/min_length": 682.0, + "completions/min_terminated_length": 682.0, + "epoch": 0.89472368092023, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.608134007055041, + "kl": 0.01959228515625, + "learning_rate": 1.30187487885213e-07, + "loss": 0.0369, + "num_tokens": 162070908.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7821747064590454, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04819438983557788, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06457235001029121, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08944271909999162, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3578 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1392.0, + "completions/max_terminated_length": 1392.0, + "completions/mean_length": 1146.125, + "completions/mean_terminated_length": 1146.125, + "completions/min_length": 970.0, + "completions/min_terminated_length": 970.0, + "epoch": 0.894973743435859, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.159893656551232, + "kl": 0.023193359375, + "learning_rate": 1.30046200650171e-07, + "loss": 0.0065, + "num_tokens": 162117406.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9259882569313049, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.023258940847862977, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08219038848267314, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09339283817414601, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3579 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1279.875, + "completions/mean_terminated_length": 1248.4285888671875, + "completions/min_length": 810.0, + "completions/min_terminated_length": 810.0, + "epoch": 0.8952238059514879, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3029842232769933, + "kl": 0.021392822265625, + "learning_rate": 1.2990523340596362e-07, + "loss": -0.0226, + "num_tokens": 162172964.0, + "reward": 0.0, + "reward_std": 1.0017611980438232, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009019634836237788, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07540860803274715, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1246476515504285, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3580 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1411.0, + "completions/max_terminated_length": 1411.0, + "completions/mean_length": 1177.875, + "completions/mean_terminated_length": 1177.875, + "completions/min_length": 992.0, + "completions/min_terminated_length": 992.0, + "epoch": 0.8954738684671167, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2675257095869625, + "kl": 0.0160980224609375, + "learning_rate": 1.2976458626000328e-07, + "loss": -0.0549, + "num_tokens": 162219442.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9740040302276611, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.013266000881551127, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03891370881888095, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1753303759784389, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3581 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1447.0625, + "completions/mean_terminated_length": 1379.0001220703125, + "completions/min_length": 1049.0, + "completions/min_terminated_length": 1049.0, + "epoch": 0.8957239309827457, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4256254224428364, + "kl": 0.0159149169921875, + "learning_rate": 1.2962425931945833e-07, + "loss": -0.0176, + "num_tokens": 162277163.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7929416298866272, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.017873908497733598, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.022608333329802756, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952502, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3582 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 1295.0625, + "completions/mean_terminated_length": 1247.769287109375, + "completions/min_length": 1060.0, + "completions/min_terminated_length": 1060.0, + "epoch": 0.8959739934983746, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.405230636362876, + "kl": 0.013397216796875, + "learning_rate": 1.2948425269125333e-07, + "loss": -0.002, + "num_tokens": 162321364.0, + "reward": 0.0, + "reward_std": 0.5876642465591431, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08286639844035075, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10709785780421237, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3583 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1371.0, + "completions/mean_length": 1049.9375, + "completions/mean_terminated_length": 1019.9334106445312, + "completions/min_length": 677.0, + "completions/min_terminated_length": 677.0, + "epoch": 0.8962240560140035, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8307849967121896, + "kl": 0.0172882080078125, + "learning_rate": 1.293445664820687e-07, + "loss": -0.0019, + "num_tokens": 162362891.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9323052167892456, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03913759947029939, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08444349801620865, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563382, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3584 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1320.875, + "completions/mean_terminated_length": 1239.45458984375, + "completions/min_length": 975.0, + "completions/min_terminated_length": 975.0, + "epoch": 0.8964741185296324, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8326024434067323, + "kl": 0.0157318115234375, + "learning_rate": 1.2920520079834058e-07, + "loss": -0.0045, + "num_tokens": 162403457.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9458730220794678, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.34671957102551315, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18136098283936158, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08255189164891871, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3585 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1229.25, + "completions/mean_terminated_length": 1190.571533203125, + "completions/min_length": 925.0, + "completions/min_terminated_length": 925.0, + "epoch": 0.8967241810452613, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7519202574296657, + "kl": 0.0176849365234375, + "learning_rate": 1.2906615574626113e-07, + "loss": -0.0159, + "num_tokens": 162437029.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7835686206817627, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04128359646751115, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05897210248037544, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3586 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1332.9375, + "completions/mean_terminated_length": 1203.0, + "completions/min_length": 881.0, + "completions/min_terminated_length": 881.0, + "epoch": 0.8969742435608902, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.841052601702869, + "kl": 0.02142333984375, + "learning_rate": 1.2892743143177793e-07, + "loss": 0.0041, + "num_tokens": 162489156.0, + "reward": 0.0, + "reward_std": 0.9272683262825012, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06755416588281551, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1371344396386832, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8250000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3587 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1250.0, + "completions/max_terminated_length": 1250.0, + "completions/mean_length": 1031.0, + "completions/mean_terminated_length": 1031.0, + "completions/min_length": 828.0, + "completions/min_terminated_length": 828.0, + "epoch": 0.8972243060765192, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.412582117069374, + "kl": 0.022216796875, + "learning_rate": 1.287890279605943e-07, + "loss": -0.021, + "num_tokens": 162536524.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0535669326782227, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02926951054135181, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08909814406858564, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13709958532503408, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3588 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1411.0, + "completions/mean_length": 1401.8125, + "completions/mean_terminated_length": 1325.4444580078125, + "completions/min_length": 1168.0, + "completions/min_terminated_length": 1168.0, + "epoch": 0.897474368592148, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.188400769535831, + "kl": 0.02166748046875, + "learning_rate": 1.286509454381691e-07, + "loss": 0.008, + "num_tokens": 162602129.0, + "reward": 0.0, + "reward_std": 0.9861205816268921, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.024389553897262193, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.034073770164091285, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715322, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3589 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1206.3125, + "completions/mean_terminated_length": 1186.7333984375, + "completions/min_length": 896.0, + "completions/min_terminated_length": 896.0, + "epoch": 0.897724431107777, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2345491084401314, + "kl": 0.0195159912109375, + "learning_rate": 1.285131839697167e-07, + "loss": -0.0099, + "num_tokens": 162648870.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5311387181282043, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01686044050181519, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07706083503585348, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12405196043952266, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3590 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1342.6875, + "completions/mean_terminated_length": 1248.300048828125, + "completions/min_length": 1052.0, + "completions/min_terminated_length": 1052.0, + "epoch": 0.8979744936234059, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2149716903114687, + "kl": 0.022064208984375, + "learning_rate": 1.2837574366020676e-07, + "loss": 0.0266, + "num_tokens": 162694041.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9456653594970703, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019686329435734326, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1375725202349855, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13655822255780922, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3591 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1279.0, + "completions/mean_length": 1199.9375, + "completions/mean_terminated_length": 1063.5455322265625, + "completions/min_length": 971.0, + "completions/min_terminated_length": 971.0, + "epoch": 0.8982245561390347, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.668421562234479, + "kl": 0.017913818359375, + "learning_rate": 1.282386246143641e-07, + "loss": 0.0195, + "num_tokens": 162734448.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8360947370529175, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0018999159724956242, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04237098368010498, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11155467020454342, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3592 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1346.0, + "completions/max_terminated_length": 1346.0, + "completions/mean_length": 1044.0625, + "completions/mean_terminated_length": 1044.0625, + "completions/min_length": 886.0, + "completions/min_terminated_length": 886.0, + "epoch": 0.8984746186546637, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.665115530609258, + "kl": 0.01397705078125, + "learning_rate": 1.2810182693666893e-07, + "loss": -0.0525, + "num_tokens": 162776641.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.0184303522109985, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01182379927044149, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06981489404539953, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11800816042090449, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3593 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1381.0, + "completions/mean_length": 1215.3125, + "completions/mean_terminated_length": 1120.416748046875, + "completions/min_length": 697.0, + "completions/min_terminated_length": 697.0, + "epoch": 0.8987246811702926, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8631056629843465, + "kl": 0.02484130859375, + "learning_rate": 1.2796535073135662e-07, + "loss": -0.049, + "num_tokens": 162813990.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7975665330886841, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.036092924731368006, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10525865538462424, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1112221667221529, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3594 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1443.9375, + "completions/mean_terminated_length": 1371.857177734375, + "completions/min_length": 1302.0, + "completions/min_terminated_length": 1302.0, + "epoch": 0.8989747436859215, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9620929068339066, + "kl": 0.006603240966796875, + "learning_rate": 1.2782919610241734e-07, + "loss": 0.0059, + "num_tokens": 162866525.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8365291357040405, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.007109065863723267, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11085096368909846, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06191391873668904, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3595 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1456.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1035.0, + "completions/mean_terminated_length": 1035.0, + "completions/min_length": 802.0, + "completions/min_terminated_length": 802.0, + "epoch": 0.8992248062015504, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.76564868326899, + "kl": 0.021728515625, + "learning_rate": 1.2769336315359653e-07, + "loss": -0.0062, + "num_tokens": 162906565.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.7047804594039917, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12330403738792405, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18907385660498427, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258102, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3596 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1168.0, + "completions/max_terminated_length": 1168.0, + "completions/mean_length": 941.625, + "completions/mean_terminated_length": 941.625, + "completions/min_length": 828.0, + "completions/min_terminated_length": 828.0, + "epoch": 0.8994748687171793, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6597606354113084, + "kl": 0.015228271484375, + "learning_rate": 1.275578519883944e-07, + "loss": 0.0377, + "num_tokens": 162950295.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.4103986918926239, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02289719102984474, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08886871726486847, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12049281521534187, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3597 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1292.0, + "completions/mean_length": 1181.875, + "completions/mean_terminated_length": 1160.666748046875, + "completions/min_length": 1076.0, + "completions/min_terminated_length": 1076.0, + "epoch": 0.8997249312328082, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8864651074189127, + "kl": 0.013580322265625, + "learning_rate": 1.2742266271006585e-07, + "loss": -0.0042, + "num_tokens": 162993213.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9090452194213867, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.032250748750914354, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1887430953729899, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3598 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1348.0625, + "completions/mean_terminated_length": 1297.416748046875, + "completions/min_length": 819.0, + "completions/min_terminated_length": 819.0, + "epoch": 0.8999749937484371, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8865659208367433, + "kl": 0.0154876708984375, + "learning_rate": 1.2728779542162057e-07, + "loss": -0.0172, + "num_tokens": 163042782.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9568639993667603, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.026620863276974327, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.034675947199271695, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09098229375970787, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3599 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1401.0, + "completions/mean_length": 1267.75, + "completions/mean_terminated_length": 1214.1539306640625, + "completions/min_length": 764.0, + "completions/min_terminated_length": 764.0, + "epoch": 0.900225056264066, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9332038363516117, + "kl": 0.0164031982421875, + "learning_rate": 1.271532502258231e-07, + "loss": -0.0582, + "num_tokens": 163100770.0, + "reward": 0.0, + "reward_std": 0.29104292392730713, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0629779843765324, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06590420129132167, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15869840952317446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3600 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1285.0, + "completions/mean_length": 1226.125, + "completions/mean_terminated_length": 952.25, + "completions/min_length": 631.0, + "completions/min_terminated_length": 631.0, + "epoch": 0.9004751187796949, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5665361280915646, + "kl": 0.0164794921875, + "learning_rate": 1.2701902722519234e-07, + "loss": 0.0224, + "num_tokens": 163135436.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9986288547515869, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.13522536520142286, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07415875020014809, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3601 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1254.1875, + "completions/mean_terminated_length": 1172.25, + "completions/min_length": 786.0, + "completions/min_terminated_length": 786.0, + "epoch": 0.9007251812953239, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7433631576951076, + "kl": 0.0145111083984375, + "learning_rate": 1.2688512652200176e-07, + "loss": 0.0328, + "num_tokens": 163187527.0, + "reward": 0.0, + "reward_std": 0.4208694100379944, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1423028707557892, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1563057872677145, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14343665526661614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3602 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1097.25, + "completions/mean_terminated_length": 1070.4000244140625, + "completions/min_length": 666.0, + "completions/min_terminated_length": 666.0, + "epoch": 0.9009752438109527, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.408024224967045, + "kl": 0.02203369140625, + "learning_rate": 1.2675154821827928e-07, + "loss": 0.0, + "num_tokens": 163223787.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6293579339981079, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.016120390972836726, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09802561059290535, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14900907255500823, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3603 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1246.0, + "completions/mean_length": 1023.875, + "completions/mean_terminated_length": 992.1333618164062, + "completions/min_length": 787.0, + "completions/min_terminated_length": 787.0, + "epoch": 0.9012253063265816, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.783364351190139, + "kl": 0.01116943359375, + "learning_rate": 1.2661829241580705e-07, + "loss": -0.0591, + "num_tokens": 163262065.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6782119870185852, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.35112931186598645, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.4022560838395606, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06763190130459204, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3604 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1197.0, + "completions/mean_length": 963.5625, + "completions/mean_terminated_length": 927.800048828125, + "completions/min_length": 688.0, + "completions/min_terminated_length": 688.0, + "epoch": 0.9014753688422106, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2528973374346095, + "kl": 0.02215576171875, + "learning_rate": 1.2648535921612156e-07, + "loss": -0.0505, + "num_tokens": 163314146.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.6708420515060425, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10601662829308178, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1653020514593023, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09269623828717427, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3605 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1392.0, + "completions/mean_length": 1340.8125, + "completions/mean_terminated_length": 1245.300048828125, + "completions/min_length": 989.0, + "completions/min_terminated_length": 989.0, + "epoch": 0.9017254313578394, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0655883246569067, + "kl": 0.018463134765625, + "learning_rate": 1.2635274872051352e-07, + "loss": 0.0427, + "num_tokens": 163374239.0, + "reward": 0.0, + "reward_std": 1.0282553434371948, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.015496554811280894, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03523206926856795, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0969917904124231, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3606 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1347.0, + "completions/mean_length": 1226.375, + "completions/mean_terminated_length": 1135.166748046875, + "completions/min_length": 941.0, + "completions/min_terminated_length": 941.0, + "epoch": 0.9019754938734683, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.302238251517701, + "kl": 0.019500732421875, + "learning_rate": 1.2622046103002775e-07, + "loss": 0.0218, + "num_tokens": 163417589.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0392258167266846, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06550682400379125, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08166128047820638, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3607 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1395.0, + "completions/mean_length": 1189.4375, + "completions/mean_terminated_length": 1117.769287109375, + "completions/min_length": 821.0, + "completions/min_terminated_length": 821.0, + "epoch": 0.9022255563890973, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.856012136234197, + "kl": 0.01708984375, + "learning_rate": 1.2608849624546306e-07, + "loss": -0.0352, + "num_tokens": 163465700.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8152775168418884, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08260041304700166, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13149116403106653, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10532137766186216, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3608 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1480.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1196.4375, + "completions/mean_terminated_length": 1196.4375, + "completions/min_length": 950.0, + "completions/min_terminated_length": 950.0, + "epoch": 0.9024756189047262, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.32309504451428, + "kl": 0.0153961181640625, + "learning_rate": 1.2595685446737229e-07, + "loss": -0.0113, + "num_tokens": 163514987.0, + "reward": 0.0, + "reward_std": 0.8872237205505371, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11027115004372968, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2226526679713512, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09803627446568496, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3609 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1321.0, + "completions/max_terminated_length": 1321.0, + "completions/mean_length": 893.375, + "completions/mean_terminated_length": 893.375, + "completions/min_length": 674.0, + "completions/min_terminated_length": 674.0, + "epoch": 0.9027256814203551, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9521398777293864, + "kl": 0.0159149169921875, + "learning_rate": 1.258255357960619e-07, + "loss": -0.0512, + "num_tokens": 163558601.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9762018322944641, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03519169044896205, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11861020835480447, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13743685418725538, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3610 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1400.0, + "completions/mean_length": 1052.125, + "completions/mean_terminated_length": 1022.2667236328125, + "completions/min_length": 668.0, + "completions/min_terminated_length": 668.0, + "epoch": 0.902975743935984, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2521979664177065, + "kl": 0.023345947265625, + "learning_rate": 1.2569454033159253e-07, + "loss": -0.0413, + "num_tokens": 163596931.0, + "reward": -1.6763806343078613e-08, + "reward_std": 1.0608930587768555, + "rewards/wordcountpos_reward_GEOBench/mean": -1.6763806343078613e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.046159073995836325, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04936208870704707, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11674600476945511, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3611 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1011.375, + "completions/mean_terminated_length": 978.800048828125, + "completions/min_length": 671.0, + "completions/min_terminated_length": 671.0, + "epoch": 0.9032258064516129, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6318966369074595, + "kl": 0.019866943359375, + "learning_rate": 1.255638681737783e-07, + "loss": 0.0101, + "num_tokens": 163629137.0, + "reward": 0.0, + "reward_std": 0.8456045389175415, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04806768146427881, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.061321320368035656, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10602235962635781, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 3612 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1308.0, + "completions/mean_length": 1037.0625, + "completions/mean_terminated_length": 970.9285888671875, + "completions/min_length": 703.0, + "completions/min_terminated_length": 703.0, + "epoch": 0.9034758689672419, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2106677289770054, + "kl": 0.0130157470703125, + "learning_rate": 1.2543351942218704e-07, + "loss": -0.1091, + "num_tokens": 163671586.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9997631311416626, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08099394491076423, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10915772279674464, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1330552655993129, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3613 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1340.0, + "completions/max_terminated_length": 1340.0, + "completions/mean_length": 991.0625, + "completions/mean_terminated_length": 991.0625, + "completions/min_length": 633.0, + "completions/min_terminated_length": 633.0, + "epoch": 0.9037259314828707, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5610609407539093, + "kl": 0.02874755859375, + "learning_rate": 1.2530349417614035e-07, + "loss": -0.0236, + "num_tokens": 163705627.0, + "reward": 0.0, + "reward_std": 0.8505828380584717, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11406649715695948, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09373024301212124, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1080980350662545, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3614 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1226.0, + "completions/max_terminated_length": 1226.0, + "completions/mean_length": 812.9375, + "completions/mean_terminated_length": 812.9375, + "completions/min_length": 628.0, + "completions/min_terminated_length": 628.0, + "epoch": 0.9039759939984996, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2137979263673517, + "kl": 0.0159149169921875, + "learning_rate": 1.2517379253471291e-07, + "loss": -0.0497, + "num_tokens": 163746978.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7886738777160645, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03490902804051585, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0745969377797724, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1837873166945363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3615 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1399.0, + "completions/mean_length": 1388.1875, + "completions/mean_terminated_length": 1276.375, + "completions/min_length": 1081.0, + "completions/min_terminated_length": 1081.0, + "epoch": 0.9042260565141286, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.223578614677759, + "kl": 0.024627685546875, + "learning_rate": 1.250444145967331e-07, + "loss": 0.0008, + "num_tokens": 163794437.0, + "reward": 0.0, + "reward_std": 0.97353196144104, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10097705862055663, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10804674500003772, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.133263870794973, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3616 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1342.0, + "completions/max_terminated_length": 1342.0, + "completions/mean_length": 1033.5, + "completions/mean_terminated_length": 1033.5, + "completions/min_length": 824.0, + "completions/min_terminated_length": 824.0, + "epoch": 0.9044761190297574, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3907851351844576, + "kl": 0.023193359375, + "learning_rate": 1.2491536046078257e-07, + "loss": -0.0079, + "num_tokens": 163832317.0, + "reward": 0.0, + "reward_std": 1.0105458498001099, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.016935583025870275, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.029211351814649913, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3617 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1199.5625, + "completions/mean_terminated_length": 1130.2308349609375, + "completions/min_length": 754.0, + "completions/min_terminated_length": 754.0, + "epoch": 0.9047261815453863, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9958169552002594, + "kl": 0.01971435546875, + "learning_rate": 1.247866302251965e-07, + "loss": -0.0486, + "num_tokens": 163872102.0, + "reward": 0.0, + "reward_std": 0.605796217918396, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.015102728018840423, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06207061354278604, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1514742369000235, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3618 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1077.0, + "completions/max_terminated_length": 1077.0, + "completions/mean_length": 847.125, + "completions/mean_terminated_length": 847.125, + "completions/min_length": 723.0, + "completions/min_terminated_length": 723.0, + "epoch": 0.9049762440610153, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1902348469400947, + "kl": 0.0086822509765625, + "learning_rate": 1.2465822398806287e-07, + "loss": -0.0058, + "num_tokens": 163914096.0, + "reward": 0.0, + "reward_std": 0.812868058681488, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10805541575247535, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20270373468822342, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13381856152046848, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3619 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1205.875, + "completions/mean_terminated_length": 1107.8333740234375, + "completions/min_length": 794.0, + "completions/min_terminated_length": 794.0, + "epoch": 0.9052263065766442, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.327719000185695, + "kl": 0.0192718505859375, + "learning_rate": 1.2453014184722295e-07, + "loss": -0.0708, + "num_tokens": 163960918.0, + "reward": 0.0, + "reward_std": 0.9743466377258301, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07293047303408458, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1501622570692563, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09339283817414601, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3620 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1488.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1079.625, + "completions/mean_terminated_length": 1079.625, + "completions/min_length": 778.0, + "completions/min_terminated_length": 778.0, + "epoch": 0.905476369092273, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6172448048913535, + "kl": 0.019866943359375, + "learning_rate": 1.244023839002712e-07, + "loss": -0.0418, + "num_tokens": 164001424.0, + "reward": 0.0, + "reward_std": 0.7619771957397461, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07639487823218502, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12714691889103394, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12345339501504503, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3621 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1251.0, + "completions/mean_length": 1027.8125, + "completions/mean_terminated_length": 960.357177734375, + "completions/min_length": 707.0, + "completions/min_terminated_length": 707.0, + "epoch": 0.905726431607902, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.29411853530354, + "kl": 0.01849365234375, + "learning_rate": 1.242749502445548e-07, + "loss": -0.0546, + "num_tokens": 164032517.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.886413037776947, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.2482702819530146, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19386108610844013, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3622 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1335.4375, + "completions/mean_terminated_length": 1297.4615478515625, + "completions/min_length": 989.0, + "completions/min_terminated_length": 989.0, + "epoch": 0.9059764941235309, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.027821704079173, + "kl": 0.0167999267578125, + "learning_rate": 1.2414784097717406e-07, + "loss": -0.0545, + "num_tokens": 164082532.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0367540121078491, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04052280457450827, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07610704663666021, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.04849589520621159, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3623 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1227.9375, + "completions/mean_terminated_length": 1104.272705078125, + "completions/min_length": 788.0, + "completions/min_terminated_length": 788.0, + "epoch": 0.9062265566391597, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2250314911650793, + "kl": 0.018524169921875, + "learning_rate": 1.2402105619498186e-07, + "loss": -0.0162, + "num_tokens": 164135723.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0159612894058228, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02520833839182911, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15924733208011918, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13305526559931294, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3624 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1299.1875, + "completions/mean_terminated_length": 1285.800048828125, + "completions/min_length": 1114.0, + "completions/min_terminated_length": 1114.0, + "epoch": 0.9064766191547887, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9566913194305555, + "kl": 0.01580810546875, + "learning_rate": 1.238945959945842e-07, + "loss": -0.0071, + "num_tokens": 164182014.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7446556687355042, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.013115951579192497, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06957529393909004, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09428090415820636, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3625 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1318.0, + "completions/mean_length": 997.75, + "completions/mean_terminated_length": 964.2667236328125, + "completions/min_length": 784.0, + "completions/min_terminated_length": 784.0, + "epoch": 0.9067266816704176, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2420812403396564, + "kl": 0.017364501953125, + "learning_rate": 1.2376846047233943e-07, + "loss": -0.0601, + "num_tokens": 164217674.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9895612001419067, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02534782632689208, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07587454770072671, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316813, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3626 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1383.0, + "completions/mean_length": 1024.25, + "completions/mean_terminated_length": 992.5333862304688, + "completions/min_length": 791.0, + "completions/min_terminated_length": 791.0, + "epoch": 0.9069767441860465, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.451679477894927, + "kl": 0.017852783203125, + "learning_rate": 1.236426497243586e-07, + "loss": -0.1122, + "num_tokens": 164256446.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9923255443572998, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0055823301350714435, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03716352758004033, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639735, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3627 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1361.0, + "completions/max_terminated_length": 1361.0, + "completions/mean_length": 1059.5625, + "completions/mean_terminated_length": 1059.5625, + "completions/min_length": 613.0, + "completions/min_terminated_length": 613.0, + "epoch": 0.9072268067016754, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3249028698388243, + "kl": 0.022796630859375, + "learning_rate": 1.2351716384650545e-07, + "loss": -0.025, + "num_tokens": 164301407.0, + "reward": 0.0, + "reward_std": 0.8633238077163696, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.14009046044529333, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1744589933753213, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13158576980363348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3628 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1361.0, + "completions/mean_length": 1356.6875, + "completions/mean_terminated_length": 1172.4285888671875, + "completions/min_length": 933.0, + "completions/min_terminated_length": 933.0, + "epoch": 0.9074768692173043, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4316185759240114, + "kl": 0.0150909423828125, + "learning_rate": 1.2339200293439588e-07, + "loss": 0.0084, + "num_tokens": 164348898.0, + "reward": -2.2351741790771484e-08, + "reward_std": 0.9892346858978271, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0759705812337776, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10240933584553813, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1029203215725281, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3629 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1343.625, + "completions/mean_terminated_length": 1291.5, + "completions/min_length": 1038.0, + "completions/min_terminated_length": 1038.0, + "epoch": 0.9077269317329333, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4349817892046635, + "kl": 0.016937255859375, + "learning_rate": 1.2326716708339845e-07, + "loss": 0.0305, + "num_tokens": 164401492.0, + "reward": 0.0, + "reward_std": 0.7984222769737244, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01711377420864891, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.027102627154493367, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059628479399994376, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3630 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1412.0, + "completions/max_terminated_length": 1412.0, + "completions/mean_length": 1090.125, + "completions/mean_terminated_length": 1090.125, + "completions/min_length": 668.0, + "completions/min_terminated_length": 668.0, + "epoch": 0.9079769942485622, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2094280060861027, + "kl": 0.023345947265625, + "learning_rate": 1.2314265638863389e-07, + "loss": -0.0162, + "num_tokens": 164455014.0, + "reward": 0.0, + "reward_std": 0.8804277777671814, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09484168496137377, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06723384282696161, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901863, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3631 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1423.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1118.8125, + "completions/mean_terminated_length": 1118.8125, + "completions/min_length": 760.0, + "completions/min_terminated_length": 760.0, + "epoch": 0.908227056764191, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5227741907656656, + "kl": 0.0222625732421875, + "learning_rate": 1.2301847094497527e-07, + "loss": 0.0215, + "num_tokens": 164503411.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0516479015350342, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.028875866840698608, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11802264604383515, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3632 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1404.0, + "completions/max_terminated_length": 1404.0, + "completions/mean_length": 1095.75, + "completions/mean_terminated_length": 1095.75, + "completions/min_length": 653.0, + "completions/min_terminated_length": 653.0, + "epoch": 0.90847711927982, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1353471484693665, + "kl": 0.019012451171875, + "learning_rate": 1.228946108470477e-07, + "loss": -0.0422, + "num_tokens": 164545327.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9494290351867676, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.33081355433710685, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.359947602967969, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408158, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3633 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1425.0, + "completions/mean_length": 1248.5, + "completions/mean_terminated_length": 1164.666748046875, + "completions/min_length": 1000.0, + "completions/min_terminated_length": 1000.0, + "epoch": 0.9087271817954489, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4098805965989807, + "kl": 0.0106048583984375, + "learning_rate": 1.2277107618922842e-07, + "loss": -0.0064, + "num_tokens": 164585767.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5449643135070801, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08549247515119929, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10775606534757544, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12224747213928168, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3634 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1287.25, + "completions/mean_terminated_length": 1159.5999755859375, + "completions/min_length": 844.0, + "completions/min_terminated_length": 844.0, + "epoch": 0.9089772443110777, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8800100675316846, + "kl": 0.0217437744140625, + "learning_rate": 1.2264786706564682e-07, + "loss": 0.0008, + "num_tokens": 164632763.0, + "reward": 0.0, + "reward_std": 0.8095055818557739, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0034961092053927036, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15502843530250707, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0909822937597079, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3635 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1229.0, + "completions/mean_length": 1334.5625, + "completions/mean_terminated_length": 1169.125, + "completions/min_length": 1094.0, + "completions/min_terminated_length": 1094.0, + "epoch": 0.9092273068267067, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7582401675531143, + "kl": 0.0185394287109375, + "learning_rate": 1.2252498357018418e-07, + "loss": -0.0061, + "num_tokens": 164691220.0, + "reward": 0.0, + "reward_std": 1.0145350694656372, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1335813293954375, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13172464268298056, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12049281521534187, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3636 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1473.3125, + "completions/mean_terminated_length": 1428.8333740234375, + "completions/min_length": 1307.0, + "completions/min_terminated_length": 1307.0, + "epoch": 0.9094773693423356, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7287212225342756, + "kl": 0.014984130859375, + "learning_rate": 1.224024257964735e-07, + "loss": -0.0021, + "num_tokens": 164743601.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6756376028060913, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03414347694592076, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0637824160275433, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10878112581387149, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3637 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 1090.25, + "completions/mean_terminated_length": 1062.933349609375, + "completions/min_length": 821.0, + "completions/min_terminated_length": 821.0, + "epoch": 0.9097274318579645, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7992321384674184, + "kl": 0.0130615234375, + "learning_rate": 1.222801938378999e-07, + "loss": 0.0098, + "num_tokens": 164792893.0, + "reward": 0.0, + "reward_std": 0.8565899133682251, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06603457148900355, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10072921407666353, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06871842709362769, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3638 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1177.0, + "completions/mean_length": 1314.0625, + "completions/mean_terminated_length": 1004.1666870117188, + "completions/min_length": 871.0, + "completions/min_terminated_length": 871.0, + "epoch": 0.9099774943735934, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4805987623400876, + "kl": 0.0130767822265625, + "learning_rate": 1.2215828778759994e-07, + "loss": -0.0364, + "num_tokens": 164826310.0, + "reward": 0.0, + "reward_std": 0.9788527488708496, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.056797899127790374, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08743013238711607, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10602235962635781, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3639 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1113.6875, + "completions/mean_terminated_length": 1087.933349609375, + "completions/min_length": 710.0, + "completions/min_terminated_length": 710.0, + "epoch": 0.9102275568892223, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.478674454210866, + "kl": 0.018646240234375, + "learning_rate": 1.22036707738462e-07, + "loss": -0.0316, + "num_tokens": 164879425.0, + "reward": 0.0, + "reward_std": 0.597262978553772, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.017153432142461525, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11494627081873163, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1338185615204685, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3640 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1098.9375, + "completions/mean_terminated_length": 1072.2000732421875, + "completions/min_length": 702.0, + "completions/min_terminated_length": 702.0, + "epoch": 0.9104776194048512, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.344101431918225, + "kl": 0.0123748779296875, + "learning_rate": 1.2191545378312608e-07, + "loss": 0.0011, + "num_tokens": 164921744.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6007506847381592, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.27577609956327476, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14049351989114017, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13957607775504186, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3641 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1391.0, + "completions/mean_length": 1291.25, + "completions/mean_terminated_length": 1128.888916015625, + "completions/min_length": 886.0, + "completions/min_terminated_length": 886.0, + "epoch": 0.9107276819204801, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.974997517501814, + "kl": 0.019744873046875, + "learning_rate": 1.217945260139836e-07, + "loss": -0.0419, + "num_tokens": 164966868.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9911636114120483, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08653771957623371, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08894021886974013, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746356, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3642 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1312.0, + "completions/mean_length": 1279.5625, + "completions/mean_terminated_length": 1179.3636474609375, + "completions/min_length": 1088.0, + "completions/min_terminated_length": 1088.0, + "epoch": 0.910977744436109, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0659356796827844, + "kl": 0.023040771484375, + "learning_rate": 1.216739245231776e-07, + "loss": -0.0052, + "num_tokens": 165016253.0, + "reward": 0.0, + "reward_std": 0.7091052532196045, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.004315088032536781, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1015300077806463, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.18413964105375955, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3643 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1449.0, + "completions/mean_length": 1281.75, + "completions/mean_terminated_length": 1250.571533203125, + "completions/min_length": 891.0, + "completions/min_terminated_length": 891.0, + "epoch": 0.9112278069517379, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.985758260758289, + "kl": 0.01995849609375, + "learning_rate": 1.2155364940260236e-07, + "loss": -0.0411, + "num_tokens": 165069961.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8724949359893799, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03149392312039575, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.4071209375652838, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3644 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1388.1875, + "completions/mean_terminated_length": 1337.3636474609375, + "completions/min_length": 1160.0, + "completions/min_terminated_length": 1160.0, + "epoch": 0.9114778694673669, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.870732891384861, + "kl": 0.01708984375, + "learning_rate": 1.2143370074390348e-07, + "loss": 0.018, + "num_tokens": 165133644.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.735543429851532, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0014665016236185113, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04316644196704216, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16683325008322933, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3645 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1404.0, + "completions/mean_length": 1249.6875, + "completions/mean_terminated_length": 1055.0, + "completions/min_length": 910.0, + "completions/min_terminated_length": 910.0, + "epoch": 0.9117279319829957, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9561075049729064, + "kl": 0.020172119140625, + "learning_rate": 1.2131407863847786e-07, + "loss": -0.0018, + "num_tokens": 165168151.0, + "reward": 0.0, + "reward_std": 1.0060992240905762, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.13633570315588003, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.26045587526287295, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823629, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3646 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1228.8125, + "completions/mean_terminated_length": 1190.071533203125, + "completions/min_length": 880.0, + "completions/min_terminated_length": 880.0, + "epoch": 0.9119779944986247, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8341343475970397, + "kl": 0.018524169921875, + "learning_rate": 1.2119478317747363e-07, + "loss": 0.0098, + "num_tokens": 165211924.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9845577478408813, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.029414492329391754, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06446807172611664, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.094182643679026, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3647 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1385.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 1171.6875, + "completions/mean_terminated_length": 1171.6875, + "completions/min_length": 799.0, + "completions/min_terminated_length": 799.0, + "epoch": 0.9122280570142536, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4673052837153704, + "kl": 0.0198974609375, + "learning_rate": 1.2107581445178983e-07, + "loss": -0.0312, + "num_tokens": 165243615.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9066362380981445, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238707, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3648 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1303.8125, + "completions/mean_terminated_length": 1258.5384521484375, + "completions/min_length": 968.0, + "completions/min_terminated_length": 968.0, + "epoch": 0.9124781195298824, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8105451595673605, + "kl": 0.019683837890625, + "learning_rate": 1.209571725520769e-07, + "loss": -0.0087, + "num_tokens": 165284956.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9635767936706543, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07095709230210194, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.039191793361292096, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12583057392117916, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3649 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1347.0, + "completions/max_terminated_length": 1347.0, + "completions/mean_length": 1044.75, + "completions/mean_terminated_length": 1044.75, + "completions/min_length": 719.0, + "completions/min_terminated_length": 719.0, + "epoch": 0.9127281820455114, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9595058428784, + "kl": 0.024169921875, + "learning_rate": 1.208388575687359e-07, + "loss": 0.0309, + "num_tokens": 165315304.0, + "reward": 0.0, + "reward_std": 1.0555014610290527, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03664477865184567, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0378465646517481, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15371932093796678, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3650 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1241.0, + "completions/mean_length": 1007.75, + "completions/mean_terminated_length": 974.9334106445312, + "completions/min_length": 883.0, + "completions/min_terminated_length": 883.0, + "epoch": 0.9129782445611403, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.617124970018278, + "kl": 0.01080322265625, + "learning_rate": 1.2072086959191881e-07, + "loss": 0.0009, + "num_tokens": 165361388.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7379844188690186, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04599835480892887, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16328116593251715, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14343665526661614, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3651 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1232.0, + "completions/max_terminated_length": 1232.0, + "completions/mean_length": 985.4375, + "completions/mean_terminated_length": 985.4375, + "completions/min_length": 719.0, + "completions/min_terminated_length": 719.0, + "epoch": 0.9132283070767692, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.965344181481015, + "kl": 0.016571044921875, + "learning_rate": 1.2060320871152864e-07, + "loss": -0.082, + "num_tokens": 165408595.0, + "reward": 0.0, + "reward_std": 0.5503830909729004, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.007331141974758361, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0715308574947356, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353545, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3652 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1476.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1075.625, + "completions/mean_terminated_length": 1075.625, + "completions/min_length": 673.0, + "completions/min_terminated_length": 673.0, + "epoch": 0.9134783695923981, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.615316127438917, + "kl": 0.0156402587890625, + "learning_rate": 1.2048587501721916e-07, + "loss": -0.0181, + "num_tokens": 165447133.0, + "reward": 0.0, + "reward_std": 0.773940920829773, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07155317734258537, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10320694497667576, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10318986456114841, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3653 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1339.0, + "completions/mean_length": 1105.1875, + "completions/mean_terminated_length": 925.727294921875, + "completions/min_length": 689.0, + "completions/min_terminated_length": 689.0, + "epoch": 0.913728432108027, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.083334324506, + "kl": 0.019775390625, + "learning_rate": 1.2036886859839459e-07, + "loss": 0.0251, + "num_tokens": 165483920.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0390379428863525, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.011545753631720938, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09884119422294856, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3654 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1310.0, + "completions/max_terminated_length": 1310.0, + "completions/mean_length": 1068.5625, + "completions/mean_terminated_length": 1068.5625, + "completions/min_length": 658.0, + "completions/min_terminated_length": 658.0, + "epoch": 0.9139784946236559, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.933617726445086, + "kl": 0.021148681640625, + "learning_rate": 1.2025218954421002e-07, + "loss": -0.0497, + "num_tokens": 165529281.0, + "reward": 0.0, + "reward_std": 0.6990038156509399, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08933799165942986, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1751672140997969, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10327955589886446, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3655 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1171.0, + "completions/mean_length": 1000.125, + "completions/mean_terminated_length": 966.800048828125, + "completions/min_length": 760.0, + "completions/min_terminated_length": 760.0, + "epoch": 0.9142285571392849, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5951525346298827, + "kl": 0.010406494140625, + "learning_rate": 1.20135837943571e-07, + "loss": 0.0011, + "num_tokens": 165557947.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0578759908676147, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06830479339333356, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.063409340903508, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746356, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3656 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1342.0, + "completions/max_terminated_length": 1342.0, + "completions/mean_length": 986.625, + "completions/mean_terminated_length": 986.625, + "completions/min_length": 698.0, + "completions/min_terminated_length": 698.0, + "epoch": 0.9144786196549137, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3307946157610147, + "kl": 0.0191497802734375, + "learning_rate": 1.2001981388513354e-07, + "loss": -0.0178, + "num_tokens": 165605981.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9865949153900146, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019654004849219305, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07772984757870213, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3657 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1239.4375, + "completions/mean_terminated_length": 1121.0, + "completions/min_length": 699.0, + "completions/min_terminated_length": 699.0, + "epoch": 0.9147286821705426, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.295689935021245, + "kl": 0.016815185546875, + "learning_rate": 1.1990411745730418e-07, + "loss": -0.0301, + "num_tokens": 165661844.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0284578800201416, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.37973275835719655, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10209269500970017, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14801151106386087, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3658 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1280.0, + "completions/mean_length": 962.25, + "completions/mean_terminated_length": 926.4000244140625, + "completions/min_length": 498.0, + "completions/min_terminated_length": 498.0, + "epoch": 0.9149787446861716, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9504753329417657, + "kl": 0.0247802734375, + "learning_rate": 1.1978874874823966e-07, + "loss": -0.0176, + "num_tokens": 165700208.0, + "reward": 0.0, + "reward_std": 0.9438822865486145, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.011307018236355762, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09944233383493724, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13443985299781488, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3659 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1374.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 1084.875, + "completions/mean_terminated_length": 1084.875, + "completions/min_length": 834.0, + "completions/min_terminated_length": 834.0, + "epoch": 0.9152288072018004, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.546383412654599, + "kl": 0.025421142578125, + "learning_rate": 1.1967370784584722e-07, + "loss": -0.0477, + "num_tokens": 165742998.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9071033000946045, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.026741083306709705, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07730578002374457, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353543, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3660 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1300.0, + "completions/max_terminated_length": 1300.0, + "completions/mean_length": 959.75, + "completions/mean_terminated_length": 959.75, + "completions/min_length": 683.0, + "completions/min_terminated_length": 683.0, + "epoch": 0.9154788697174293, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.667195286626399, + "kl": 0.020965576171875, + "learning_rate": 1.1955899483778414e-07, + "loss": -0.051, + "num_tokens": 165793346.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.969125509262085, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.046955619608878486, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07108279048435734, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3661 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1134.0, + "completions/mean_length": 1043.0, + "completions/mean_terminated_length": 937.5385131835938, + "completions/min_length": 776.0, + "completions/min_terminated_length": 776.0, + "epoch": 0.9157289322330583, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4466550788508763, + "kl": 0.021575927734375, + "learning_rate": 1.1944460981145786e-07, + "loss": -0.0152, + "num_tokens": 165837002.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5857294797897339, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04464122573681901, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03152698263979651, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316813, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3662 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1329.1875, + "completions/mean_terminated_length": 1226.7000732421875, + "completions/min_length": 951.0, + "completions/min_terminated_length": 951.0, + "epoch": 0.9159789947486872, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0389118927808765, + "kl": 0.018524169921875, + "learning_rate": 1.193305528540261e-07, + "loss": -0.0081, + "num_tokens": 165893117.0, + "reward": 0.0, + "reward_std": 0.71659255027771, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009517181802962781, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.023031638982865997, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07685966046898342, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3663 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1304.0, + "completions/mean_length": 1201.5625, + "completions/mean_terminated_length": 1132.6923828125, + "completions/min_length": 672.0, + "completions/min_terminated_length": 672.0, + "epoch": 0.916229057264316, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1426520877532407, + "kl": 0.0264892578125, + "learning_rate": 1.1921682405239642e-07, + "loss": -0.0245, + "num_tokens": 165941974.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.068223476409912, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.13691096299449798, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18655484860236776, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09269623828717427, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3664 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1369.0, + "completions/mean_length": 1413.0625, + "completions/mean_terminated_length": 1326.125, + "completions/min_length": 1272.0, + "completions/min_terminated_length": 1272.0, + "epoch": 0.916479119779945, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.9755503408588944, + "kl": 0.008575439453125, + "learning_rate": 1.1910342349322628e-07, + "loss": -0.0014, + "num_tokens": 165993495.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.859343409538269, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.033964755234913456, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.058818089296163646, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07876359377087683, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3665 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1476.0, + "completions/mean_length": 1455.8125, + "completions/mean_terminated_length": 1421.4444580078125, + "completions/min_length": 1292.0, + "completions/min_terminated_length": 1292.0, + "epoch": 0.9167291822955739, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1645696952881406, + "kl": 0.02557373046875, + "learning_rate": 1.189903512629232e-07, + "loss": -0.0026, + "num_tokens": 166048196.0, + "reward": 0.0, + "reward_std": 0.9250848293304443, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.014082842748634649, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07334284338450847, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11666666666666667, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3666 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1331.0, + "completions/max_terminated_length": 1331.0, + "completions/mean_length": 1057.0625, + "completions/mean_terminated_length": 1057.0625, + "completions/min_length": 757.0, + "completions/min_terminated_length": 757.0, + "epoch": 0.9169792448112029, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0038469341280662, + "kl": 0.0148773193359375, + "learning_rate": 1.1887760744764461e-07, + "loss": -0.0298, + "num_tokens": 166092533.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7504178285598755, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009824053092160903, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.057052996018908585, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17638342073763938, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3667 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1173.5, + "completions/mean_terminated_length": 1126.857177734375, + "completions/min_length": 778.0, + "completions/min_terminated_length": 778.0, + "epoch": 0.9172293073268317, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.843996961562941, + "kl": 0.015533447265625, + "learning_rate": 1.187651921332974e-07, + "loss": -0.0518, + "num_tokens": 166139917.0, + "reward": 0.0, + "reward_std": 0.6764519810676575, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.020786376155859727, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02193883529140396, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12041594578792297, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3668 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1190.0, + "completions/max_terminated_length": 1190.0, + "completions/mean_length": 829.6875, + "completions/mean_terminated_length": 829.6875, + "completions/min_length": 535.0, + "completions/min_terminated_length": 535.0, + "epoch": 0.9174793698424606, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.24682340097439, + "kl": 0.02105712890625, + "learning_rate": 1.186531054055382e-07, + "loss": -0.1094, + "num_tokens": 166179728.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9328632950782776, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.15751357530655943, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17859156496104278, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0906764700582363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3669 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1353.0, + "completions/max_terminated_length": 1353.0, + "completions/mean_length": 965.875, + "completions/mean_terminated_length": 965.875, + "completions/min_length": 537.0, + "completions/min_terminated_length": 537.0, + "epoch": 0.9177294323580896, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.956090886330282, + "kl": 0.0189056396484375, + "learning_rate": 1.1854134734977362e-07, + "loss": -0.1183, + "num_tokens": 166209662.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6649502515792847, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09382092424074559, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0887637923743045, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1354690069789096, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3670 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1310.0, + "completions/mean_length": 1086.0625, + "completions/mean_terminated_length": 1026.9285888671875, + "completions/min_length": 642.0, + "completions/min_terminated_length": 642.0, + "epoch": 0.9179794948737184, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2413064616695673, + "kl": 0.02081298828125, + "learning_rate": 1.1842991805115943e-07, + "loss": -0.0038, + "num_tokens": 166247367.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0466474294662476, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02519493966525234, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11141416732891947, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242309, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3671 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1070.0, + "completions/max_terminated_length": 1070.0, + "completions/mean_length": 868.8125, + "completions/mean_terminated_length": 868.8125, + "completions/min_length": 647.0, + "completions/min_terminated_length": 647.0, + "epoch": 0.9182295573893473, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.052918000587236, + "kl": 0.0204925537109375, + "learning_rate": 1.1831881759460102e-07, + "loss": -0.0328, + "num_tokens": 166291212.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9026131629943848, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.13524684936401407, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1496331025152014, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14291929864761418, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3672 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.9375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1224.0, + "completions/mean_length": 1482.75, + "completions/mean_terminated_length": 1224.0, + "completions/min_length": 1224.0, + "completions/min_terminated_length": 1224.0, + "epoch": 0.9184796199049763, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.895701525654411, + "kl": 0.0181732177734375, + "learning_rate": 1.1820804606475338e-07, + "loss": -0.0035, + "num_tokens": 166347240.0, + "reward": 0.0, + "reward_std": 0.6414437294006348, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.021349836838124563, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08978910756320248, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16947631758514883, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3673 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1359.0, + "completions/mean_length": 1230.5625, + "completions/mean_terminated_length": 1108.0909423828125, + "completions/min_length": 892.0, + "completions/min_terminated_length": 892.0, + "epoch": 0.9187296824206052, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9759454867578556, + "kl": 0.01904296875, + "learning_rate": 1.180976035460206e-07, + "loss": 0.0805, + "num_tokens": 166399385.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.045499563217163, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.007565439447408341, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.016824428657265803, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09651328828101764, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3674 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1289.0, + "completions/max_terminated_length": 1289.0, + "completions/mean_length": 1150.5625, + "completions/mean_terminated_length": 1150.5625, + "completions/min_length": 915.0, + "completions/min_terminated_length": 915.0, + "epoch": 0.918979744936234, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.614783556922729, + "kl": 0.0167236328125, + "learning_rate": 1.1798749012255632e-07, + "loss": -0.0182, + "num_tokens": 166441578.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8135178089141846, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08980085561530321, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1726390543546756, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10318986456114838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3675 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1182.0, + "completions/max_terminated_length": 1182.0, + "completions/mean_length": 1019.9375, + "completions/mean_terminated_length": 1019.9375, + "completions/min_length": 851.0, + "completions/min_terminated_length": 851.0, + "epoch": 0.919229807451863, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.778686337269156, + "kl": 0.015655517578125, + "learning_rate": 1.1787770587826315e-07, + "loss": 0.0196, + "num_tokens": 166480673.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.34620043635368347, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.020455639132508106, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0978963535010105, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12531441937663723, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3676 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1346.625, + "completions/mean_terminated_length": 1295.5, + "completions/min_length": 1041.0, + "completions/min_terminated_length": 1041.0, + "epoch": 0.9194798699674919, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1367189207537924, + "kl": 0.021575927734375, + "learning_rate": 1.1776825089679324e-07, + "loss": -0.0232, + "num_tokens": 166521891.0, + "reward": 0.0, + "reward_std": 0.9211207032203674, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03138988822012761, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05572121183203751, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06871842709362769, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3677 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1309.0, + "completions/max_terminated_length": 1309.0, + "completions/mean_length": 1136.4375, + "completions/mean_terminated_length": 1136.4375, + "completions/min_length": 775.0, + "completions/min_terminated_length": 775.0, + "epoch": 0.9197299324831207, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2215799764730133, + "kl": 0.021697998046875, + "learning_rate": 1.1765912526154752e-07, + "loss": 0.0481, + "num_tokens": 166562338.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.048806071281433, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.24584124067019048, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2251008216630915, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11417984514369006, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3678 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1228.0, + "completions/mean_terminated_length": 1189.1429443359375, + "completions/min_length": 864.0, + "completions/min_terminated_length": 864.0, + "epoch": 0.9199799949987497, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.504142014124764, + "kl": 0.0125274658203125, + "learning_rate": 1.1755032905567612e-07, + "loss": -0.0235, + "num_tokens": 166622506.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0465307235717773, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00014653291919973586, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08457826789418427, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11279282877125756, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3679 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1272.0, + "completions/max_terminated_length": 1272.0, + "completions/mean_length": 1006.875, + "completions/mean_terminated_length": 1006.875, + "completions/min_length": 780.0, + "completions/min_terminated_length": 780.0, + "epoch": 0.9202300575143786, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9337755872664153, + "kl": 0.02880859375, + "learning_rate": 1.1744186236207815e-07, + "loss": 0.0007, + "num_tokens": 166666504.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8192830681800842, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11577778025658722, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12163667161299514, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3680 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1206.125, + "completions/mean_terminated_length": 1186.533447265625, + "completions/min_length": 831.0, + "completions/min_terminated_length": 831.0, + "epoch": 0.9204801200300075, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2457984259974944, + "kl": 0.023101806640625, + "learning_rate": 1.1733372526340158e-07, + "loss": 0.0275, + "num_tokens": 166712618.0, + "reward": 0.0, + "reward_std": 0.5258550643920898, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.017611437283145563, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04531597114359553, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13526380260918403, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3681 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1261.3125, + "completions/mean_terminated_length": 1206.2308349609375, + "completions/min_length": 881.0, + "completions/min_terminated_length": 881.0, + "epoch": 0.9207301825456364, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9608639482078907, + "kl": 0.016632080078125, + "learning_rate": 1.1722591784204336e-07, + "loss": 0.0459, + "num_tokens": 166752167.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9382869005203247, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02657541761950207, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17701708598800076, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10183501544346313, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3682 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1380.0, + "completions/mean_length": 1218.125, + "completions/mean_terminated_length": 1090.0, + "completions/min_length": 890.0, + "completions/min_terminated_length": 890.0, + "epoch": 0.9209802450612653, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6223416861418145, + "kl": 0.018890380859375, + "learning_rate": 1.1711844018014902e-07, + "loss": 0.0148, + "num_tokens": 166812593.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0462459325790405, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04479768450040108, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13266311651880697, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11122216672215289, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3683 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1127.0, + "completions/max_terminated_length": 1127.0, + "completions/mean_length": 995.5625, + "completions/mean_terminated_length": 995.5625, + "completions/min_length": 794.0, + "completions/min_terminated_length": 794.0, + "epoch": 0.9212303075768942, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.669315029539074, + "kl": 0.01318359375, + "learning_rate": 1.1701129235961311e-07, + "loss": -0.0154, + "num_tokens": 166851930.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9633774757385254, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011162130287810042, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08380648066109268, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133131, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3684 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1315.0, + "completions/max_terminated_length": 1315.0, + "completions/mean_length": 957.75, + "completions/mean_terminated_length": 957.75, + "completions/min_length": 727.0, + "completions/min_terminated_length": 727.0, + "epoch": 0.9214803700925231, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.331535322403649, + "kl": 0.0264892578125, + "learning_rate": 1.169044744620787e-07, + "loss": -0.0254, + "num_tokens": 166893846.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0149259567260742, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019294443883018084, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0950007262523985, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09888264649460886, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3685 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1374.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 921.5, + "completions/mean_terminated_length": 921.5, + "completions/min_length": 681.0, + "completions/min_terminated_length": 681.0, + "epoch": 0.921730432608152, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2479465530176608, + "kl": 0.02154541015625, + "learning_rate": 1.1679798656893742e-07, + "loss": 0.0118, + "num_tokens": 166944670.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0569757223129272, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0117741943687278, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04123346943966635, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503963, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3686 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1450.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1130.1875, + "completions/mean_terminated_length": 1130.1875, + "completions/min_length": 830.0, + "completions/min_terminated_length": 830.0, + "epoch": 0.921980495123781, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8306733240606268, + "kl": 0.02117919921875, + "learning_rate": 1.1669182876132949e-07, + "loss": -0.033, + "num_tokens": 166987793.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8796590566635132, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01968715161280749, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06739216862442296, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06382847385042252, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3687 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1423.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1124.0, + "completions/mean_terminated_length": 1124.0, + "completions/min_length": 858.0, + "completions/min_terminated_length": 858.0, + "epoch": 0.9222305576394099, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.975101672423889, + "kl": 0.01380157470703125, + "learning_rate": 1.1658600112014368e-07, + "loss": -0.0178, + "num_tokens": 167030457.0, + "reward": 1.1175870895385742e-08, + "reward_std": 0.9482818841934204, + "rewards/wordcountpos_reward_GEOBench/mean": 1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08898790407699046, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0864153823665012, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08062257748298553, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3688 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1434.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1254.0625, + "completions/mean_terminated_length": 1254.0625, + "completions/min_length": 1066.0, + "completions/min_terminated_length": 1066.0, + "epoch": 0.9224806201550387, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.077910709407568, + "kl": 0.02410888671875, + "learning_rate": 1.164805037260171e-07, + "loss": 0.0016, + "num_tokens": 167080378.0, + "reward": 0.0, + "reward_std": 0.8884323239326477, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04603644347376898, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12772213312309055, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10174405069512348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3689 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1434.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 950.9375, + "completions/mean_terminated_length": 950.9375, + "completions/min_length": 496.0, + "completions/min_terminated_length": 496.0, + "epoch": 0.9227306826706677, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.85390803124126, + "kl": 0.0139312744140625, + "learning_rate": 1.1637533665933522e-07, + "loss": -0.0238, + "num_tokens": 167110753.0, + "reward": 0.0, + "reward_std": 1.02589750289917, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03341112113618596, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07028939343230597, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0958393717904348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3690 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 1174.0, + "completions/mean_terminated_length": 1152.2667236328125, + "completions/min_length": 739.0, + "completions/min_terminated_length": 739.0, + "epoch": 0.9229807451862966, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5026658296095317, + "kl": 0.0144195556640625, + "learning_rate": 1.1627050000023199e-07, + "loss": -0.0857, + "num_tokens": 167165785.0, + "reward": 0.0, + "reward_std": 0.9063175916671753, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04431496298501416, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06972499508012106, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10945995377982527, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3691 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1412.0, + "completions/mean_length": 1183.0625, + "completions/mean_terminated_length": 1161.933349609375, + "completions/min_length": 917.0, + "completions/min_terminated_length": 917.0, + "epoch": 0.9232308077019254, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.0811727325076985, + "kl": 0.0154876708984375, + "learning_rate": 1.1616599382858936e-07, + "loss": 0.0371, + "num_tokens": 167202946.0, + "reward": 0.0, + "reward_std": 1.0078431367874146, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.002522716824938962, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06532044927400216, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12852438880818895, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3692 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1311.0, + "completions/mean_length": 1105.75, + "completions/mean_terminated_length": 1049.4285888671875, + "completions/min_length": 788.0, + "completions/min_terminated_length": 788.0, + "epoch": 0.9234808702175544, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.472817751820018, + "kl": 0.0175628662109375, + "learning_rate": 1.1606181822403753e-07, + "loss": -0.0607, + "num_tokens": 167257462.0, + "reward": 0.0, + "reward_std": 1.058694839477539, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06444914979041289, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15879541340280515, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09098229375970789, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3693 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1216.0, + "completions/mean_length": 1122.25, + "completions/mean_terminated_length": 996.3333740234375, + "completions/min_length": 607.0, + "completions/min_terminated_length": 607.0, + "epoch": 0.9237309327331833, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2667491540126776, + "kl": 0.01531982421875, + "learning_rate": 1.1595797326595485e-07, + "loss": -0.0349, + "num_tokens": 167291130.0, + "reward": 0.0, + "reward_std": 0.7666068077087402, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.009392406661667707, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.019434783544758652, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13526380260918403, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3694 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1199.5, + "completions/mean_terminated_length": 1130.1539306640625, + "completions/min_length": 821.0, + "completions/min_terminated_length": 821.0, + "epoch": 0.9239809952488122, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3976624786399188, + "kl": 0.022613525390625, + "learning_rate": 1.1585445903346783e-07, + "loss": 0.0606, + "num_tokens": 167338298.0, + "reward": 0.0, + "reward_std": 0.7824001312255859, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.043498827686845994, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09176494532437252, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.133263870794973, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3695 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1313.0, + "completions/mean_length": 1176.375, + "completions/mean_terminated_length": 1101.6923828125, + "completions/min_length": 729.0, + "completions/min_terminated_length": 729.0, + "epoch": 0.9242310577644411, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1504112683764287, + "kl": 0.014312744140625, + "learning_rate": 1.1575127560545078e-07, + "loss": 0.0337, + "num_tokens": 167380264.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8989863395690918, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.049686545446026936, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06538249030468826, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13305526559931294, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3696 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1500.0, + "completions/mean_length": 1317.8125, + "completions/mean_terminated_length": 1275.769287109375, + "completions/min_length": 1025.0, + "completions/min_terminated_length": 1025.0, + "epoch": 0.92448112028007, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8081692525288835, + "kl": 0.0153045654296875, + "learning_rate": 1.1564842306052605e-07, + "loss": -0.0302, + "num_tokens": 167422525.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9955623745918274, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05499473222217102, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12823056820017184, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.18539247657434854, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3697 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1252.0625, + "completions/mean_terminated_length": 1103.300048828125, + "completions/min_length": 828.0, + "completions/min_terminated_length": 828.0, + "epoch": 0.9247311827956989, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.367780209186047, + "kl": 0.01995849609375, + "learning_rate": 1.1554590147706392e-07, + "loss": 0.0127, + "num_tokens": 167455806.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0361104011535645, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04201290408791498, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11954296018850936, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1112221667221529, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3698 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1317.0, + "completions/mean_length": 1035.6875, + "completions/mean_terminated_length": 1004.7333984375, + "completions/min_length": 733.0, + "completions/min_terminated_length": 733.0, + "epoch": 0.9249812453113279, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9102563303118156, + "kl": 0.025909423828125, + "learning_rate": 1.1544371093318238e-07, + "loss": -0.0484, + "num_tokens": 167498697.0, + "reward": 0.0, + "reward_std": 0.8374572992324829, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11105867315075595, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06588625300016976, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11538983843829065, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3699 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 1205.4375, + "completions/mean_terminated_length": 1185.800048828125, + "completions/min_length": 902.0, + "completions/min_terminated_length": 902.0, + "epoch": 0.9252313078269567, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.296944332087222, + "kl": 0.019744873046875, + "learning_rate": 1.1534185150674724e-07, + "loss": -0.0397, + "num_tokens": 167550328.0, + "reward": 0.0, + "reward_std": 1.0235538482666016, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02670051545653741, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.052485238323633276, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12382783747337808, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3700 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1040.3125, + "completions/mean_terminated_length": 1009.666748046875, + "completions/min_length": 711.0, + "completions/min_terminated_length": 711.0, + "epoch": 0.9254813703425856, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.008064001061431, + "kl": 0.020050048828125, + "learning_rate": 1.1524032327537189e-07, + "loss": -0.0373, + "num_tokens": 167591205.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7322176694869995, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08300715030783865, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11393710493941661, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10183501544346311, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3701 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1212.0, + "completions/max_terminated_length": 1212.0, + "completions/mean_length": 983.1875, + "completions/mean_terminated_length": 983.1875, + "completions/min_length": 741.0, + "completions/min_terminated_length": 741.0, + "epoch": 0.9257314328582146, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.957791970361254, + "kl": 0.0140533447265625, + "learning_rate": 1.1513912631641768e-07, + "loss": -0.0345, + "num_tokens": 167627776.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.061094880104065, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.013546931689707437, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09281789952175556, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06540472290116195, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3702 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1306.9375, + "completions/mean_terminated_length": 1262.3846435546875, + "completions/min_length": 1023.0, + "completions/min_terminated_length": 1023.0, + "epoch": 0.9259814953738434, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2866607808289046, + "kl": 0.00909423828125, + "learning_rate": 1.1503826070699325e-07, + "loss": -0.0062, + "num_tokens": 167679695.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0034544467926025, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0032694061658154194, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08856487343901384, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797315, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3703 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1262.0, + "completions/max_terminated_length": 1262.0, + "completions/mean_length": 1042.75, + "completions/mean_terminated_length": 1042.75, + "completions/min_length": 843.0, + "completions/min_terminated_length": 843.0, + "epoch": 0.9262315578894724, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.999992433341095, + "kl": 0.021759033203125, + "learning_rate": 1.1493772652395468e-07, + "loss": -0.0236, + "num_tokens": 167711363.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0209553241729736, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.013736270699684361, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04023809951816783, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08509254221575907, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 3704 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1210.625, + "completions/mean_terminated_length": 1114.166748046875, + "completions/min_length": 726.0, + "completions/min_terminated_length": 726.0, + "epoch": 0.9264816204051013, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.093794604729201, + "kl": 0.0206298828125, + "learning_rate": 1.1483752384390582e-07, + "loss": 0.0558, + "num_tokens": 167754221.0, + "reward": 0.0, + "reward_std": 0.7159785032272339, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.012710912092848314, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0683534653583931, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11917929226045819, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3705 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1181.4375, + "completions/mean_terminated_length": 933.6666870117188, + "completions/min_length": 555.0, + "completions/min_terminated_length": 555.0, + "epoch": 0.9267316829207302, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2516175017813818, + "kl": 0.0164337158203125, + "learning_rate": 1.1473765274319786e-07, + "loss": -0.0308, + "num_tokens": 167804076.0, + "reward": 0.0, + "reward_std": 0.9640519618988037, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06519588001676221, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08238359966314572, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3706 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1484.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1150.625, + "completions/mean_terminated_length": 1150.625, + "completions/min_length": 837.0, + "completions/min_terminated_length": 837.0, + "epoch": 0.9269817454363591, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3201367058190585, + "kl": 0.02081298828125, + "learning_rate": 1.146381132979291e-07, + "loss": 0.0005, + "num_tokens": 167849294.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9468681812286377, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01304214802578947, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.036317122528147, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11538983843829066, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3707 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1234.0, + "completions/max_terminated_length": 1234.0, + "completions/mean_length": 972.125, + "completions/mean_terminated_length": 972.125, + "completions/min_length": 783.0, + "completions/min_terminated_length": 783.0, + "epoch": 0.927231807951988, + "frac_reward_zero_std": 0.0, + "grad_norm": 5.959086598764745, + "kl": 0.033203125, + "learning_rate": 1.145389055839453e-07, + "loss": 0.0152, + "num_tokens": 167883696.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0413585901260376, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01489147810072024, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1362894018590004, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901162, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3708 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 1305.5, + "completions/mean_terminated_length": 1217.0909423828125, + "completions/min_length": 997.0, + "completions/min_terminated_length": 997.0, + "epoch": 0.9274818704676169, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.525358058156034, + "kl": 0.017181396484375, + "learning_rate": 1.1444002967683961e-07, + "loss": -0.0324, + "num_tokens": 167940736.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0090341567993164, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05655878529289674, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12346577053901824, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.102469507659596, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3709 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1439.0, + "completions/mean_length": 1338.5625, + "completions/mean_terminated_length": 1241.7000732421875, + "completions/min_length": 984.0, + "completions/min_terminated_length": 984.0, + "epoch": 0.9277319329832459, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.663510102545798, + "kl": 0.03155517578125, + "learning_rate": 1.1434148565195204e-07, + "loss": -0.0232, + "num_tokens": 168002481.0, + "reward": 0.0, + "reward_std": 1.0652945041656494, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03447230969174992, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08340242134864712, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13709958532503408, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3710 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 1384.25, + "completions/mean_terminated_length": 1235.4285888671875, + "completions/min_length": 1087.0, + "completions/min_terminated_length": 1087.0, + "epoch": 0.9279819954988747, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.980456351829907, + "kl": 0.011016845703125, + "learning_rate": 1.1424327358436979e-07, + "loss": -0.0202, + "num_tokens": 168061197.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7928066253662109, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09307709770905287, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11309887740569939, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0859586463881842, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3711 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1268.4375, + "completions/mean_terminated_length": 1235.357177734375, + "completions/min_length": 950.0, + "completions/min_terminated_length": 950.0, + "epoch": 0.9282320580145036, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.869184880268294, + "kl": 0.0153961181640625, + "learning_rate": 1.1414539354892734e-07, + "loss": -0.0071, + "num_tokens": 168110196.0, + "reward": 0.0, + "reward_std": 0.7408984899520874, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.14190117564659246, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.24480623972331175, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13871099718746435, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3712 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1372.0, + "completions/mean_length": 1298.5, + "completions/mean_terminated_length": 1097.0, + "completions/min_length": 937.0, + "completions/min_terminated_length": 937.0, + "epoch": 0.9284821205301326, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.206596304170365, + "kl": 0.019500732421875, + "learning_rate": 1.14047845620206e-07, + "loss": -0.0459, + "num_tokens": 168156460.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8133715391159058, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.16164104382340966, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.059324852720729786, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11287488977066931, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3713 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1365.0, + "completions/mean_length": 1206.0625, + "completions/mean_terminated_length": 1186.4666748046875, + "completions/min_length": 951.0, + "completions/min_terminated_length": 951.0, + "epoch": 0.9287321830457614, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.496590533073252, + "kl": 0.0247802734375, + "learning_rate": 1.13950629872534e-07, + "loss": -0.0378, + "num_tokens": 168203949.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9511152505874634, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.021509539226702522, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0393626860730906, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09737289911202955, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3714 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1283.9375, + "completions/mean_terminated_length": 1211.916748046875, + "completions/min_length": 732.0, + "completions/min_terminated_length": 732.0, + "epoch": 0.9289822455613903, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.488844156860945, + "kl": 0.0114898681640625, + "learning_rate": 1.1385374637998654e-07, + "loss": -0.0318, + "num_tokens": 168248908.0, + "reward": 0.0, + "reward_std": 0.8914376497268677, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.2758068788410624, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.5756107239577221, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.060705726131767716, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3715 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1264.8125, + "completions/mean_terminated_length": 1210.5384521484375, + "completions/min_length": 1012.0, + "completions/min_terminated_length": 1012.0, + "epoch": 0.9292323080770193, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.538710206334408, + "kl": 0.01494598388671875, + "learning_rate": 1.137571952163856e-07, + "loss": -0.0004, + "num_tokens": 168293017.0, + "reward": 0.0, + "reward_std": 0.9002714157104492, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02098093648033003, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08852600238439304, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3716 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1285.0, + "completions/max_terminated_length": 1285.0, + "completions/mean_length": 1134.8125, + "completions/mean_terminated_length": 1134.8125, + "completions/min_length": 951.0, + "completions/min_terminated_length": 951.0, + "epoch": 0.9294823705926482, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7842666803118394, + "kl": 0.0218505859375, + "learning_rate": 1.1366097645529999e-07, + "loss": -0.0054, + "num_tokens": 168338542.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9584531784057617, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09862667415268088, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11114464403711355, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09179284245476839, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3717 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1250.8125, + "completions/mean_terminated_length": 1234.2000732421875, + "completions/min_length": 912.0, + "completions/min_terminated_length": 912.0, + "epoch": 0.929732433108277, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.174515298372483, + "kl": 0.0184173583984375, + "learning_rate": 1.1356509017004506e-07, + "loss": 0.0047, + "num_tokens": 168391011.0, + "reward": 0.0, + "reward_std": 0.6713659763336182, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1365881227280565, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.24280307385117603, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17421782874278927, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3718 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1379.3125, + "completions/mean_terminated_length": 1224.1429443359375, + "completions/min_length": 1021.0, + "completions/min_terminated_length": 1021.0, + "epoch": 0.929982495623906, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.862664198376647, + "kl": 0.019378662109375, + "learning_rate": 1.1346953643368323e-07, + "loss": -0.0307, + "num_tokens": 168446856.0, + "reward": 0.0, + "reward_std": 0.5578821301460266, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03744641368021573, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06264953628207848, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3719 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1445.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1266.8125, + "completions/mean_terminated_length": 1266.8125, + "completions/min_length": 1130.0, + "completions/min_terminated_length": 1130.0, + "epoch": 0.9302325581395349, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.742147149651199, + "kl": 0.016448974609375, + "learning_rate": 1.1337431531902309e-07, + "loss": 0.0087, + "num_tokens": 168496381.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7464860677719116, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0035606811283251903, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06501226381848355, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3720 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1300.375, + "completions/mean_terminated_length": 1233.8333740234375, + "completions/min_length": 950.0, + "completions/min_terminated_length": 950.0, + "epoch": 0.9304826206551637, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.339130760790173, + "kl": 0.022430419921875, + "learning_rate": 1.1327942689862006e-07, + "loss": -0.0299, + "num_tokens": 168549883.0, + "reward": 0.0, + "reward_std": 0.6953235864639282, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09905348041410841, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10128766564553646, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11792967144619462, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3721 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1473.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1184.3125, + "completions/mean_terminated_length": 1184.3125, + "completions/min_length": 906.0, + "completions/min_terminated_length": 906.0, + "epoch": 0.9307326831707927, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.189893126237647, + "kl": 0.0168914794921875, + "learning_rate": 1.131848712447758e-07, + "loss": 0.022, + "num_tokens": 168587440.0, + "reward": 2.2351741790771484e-08, + "reward_std": 0.9741250276565552, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.007294315678352813, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10538052725017402, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05900408021045226, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3722 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1415.4375, + "completions/mean_terminated_length": 1229.4000244140625, + "completions/min_length": 952.0, + "completions/min_terminated_length": 952.0, + "epoch": 0.9309827456864216, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.70577914499038, + "kl": 0.020416259765625, + "learning_rate": 1.1309064842953873e-07, + "loss": -0.0279, + "num_tokens": 168650295.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0387792587280273, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04406661412149763, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07052332557117479, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07490735018081413, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3723 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1447.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1003.9375, + "completions/mean_terminated_length": 1003.9375, + "completions/min_length": 696.0, + "completions/min_terminated_length": 696.0, + "epoch": 0.9312328082020506, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8234379261998144, + "kl": 0.0213623046875, + "learning_rate": 1.129967585247035e-07, + "loss": -0.0854, + "num_tokens": 168691078.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9566839337348938, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.019379760917478034, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.025289924591592645, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12405196043952266, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3724 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1434.0, + "completions/mean_length": 1260.6875, + "completions/mean_terminated_length": 1226.5, + "completions/min_length": 1053.0, + "completions/min_terminated_length": 1053.0, + "epoch": 0.9314828707176794, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1210195600159465, + "kl": 0.0175323486328125, + "learning_rate": 1.12903201601811e-07, + "loss": -0.0257, + "num_tokens": 168741665.0, + "reward": 0.0, + "reward_std": 0.9274099469184875, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.023292883379914055, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09582579050413584, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14089659985908765, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3725 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1267.0, + "completions/mean_length": 1222.6875, + "completions/mean_terminated_length": 1007.0, + "completions/min_length": 578.0, + "completions/min_terminated_length": 578.0, + "epoch": 0.9317329332333083, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8740191253613294, + "kl": 0.0198974609375, + "learning_rate": 1.1280997773214862e-07, + "loss": -0.03, + "num_tokens": 168784044.0, + "reward": 0.0, + "reward_std": 0.8374345898628235, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.032701642756742526, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0781125081308063, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09496588081262936, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3726 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1455.0, + "completions/mean_length": 1351.25, + "completions/mean_terminated_length": 1235.5555419921875, + "completions/min_length": 1018.0, + "completions/min_terminated_length": 1018.0, + "epoch": 0.9319829957489373, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.486204460729253, + "kl": 0.0146942138671875, + "learning_rate": 1.1271708698674978e-07, + "loss": 0.0051, + "num_tokens": 168841976.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.7415708899497986, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08504731849606924, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14671501199648515, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13158576980363346, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3727 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1361.0, + "completions/mean_length": 1018.3125, + "completions/mean_terminated_length": 986.2000732421875, + "completions/min_length": 731.0, + "completions/min_terminated_length": 731.0, + "epoch": 0.9322330582645662, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5577673579215756, + "kl": 0.0181732177734375, + "learning_rate": 1.126245294363941e-07, + "loss": -0.0616, + "num_tokens": 168876117.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0117090940475464, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03115273876545437, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08199763801411568, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11344765475923412, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3728 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1180.8125, + "completions/mean_terminated_length": 1135.21435546875, + "completions/min_length": 829.0, + "completions/min_terminated_length": 829.0, + "epoch": 0.932483120780195, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.149910779904981, + "kl": 0.0152587890625, + "learning_rate": 1.1253230515160734e-07, + "loss": 0.0023, + "num_tokens": 168922978.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9817203879356384, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00342808229703628, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09002074222332865, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10602235962635781, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3729 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1392.0, + "completions/max_terminated_length": 1392.0, + "completions/mean_length": 1164.4375, + "completions/mean_terminated_length": 1164.4375, + "completions/min_length": 814.0, + "completions/min_terminated_length": 814.0, + "epoch": 0.932733183295824, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.488939522166382, + "kl": 0.017669677734375, + "learning_rate": 1.1244041420266149e-07, + "loss": 0.0031, + "num_tokens": 168965697.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9931528568267822, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.040079577404445565, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.043065292890433086, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0787635937708768, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3730 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1379.0, + "completions/mean_length": 1116.25, + "completions/mean_terminated_length": 1061.4285888671875, + "completions/min_length": 736.0, + "completions/min_terminated_length": 736.0, + "epoch": 0.9329832458114529, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8937204961611576, + "kl": 0.01611328125, + "learning_rate": 1.1234885665957424e-07, + "loss": 0.0281, + "num_tokens": 169016517.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8816565275192261, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.006794891882347925, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12199405215380137, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0859586463881842, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3731 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1459.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1140.625, + "completions/mean_terminated_length": 1140.625, + "completions/min_length": 840.0, + "completions/min_terminated_length": 840.0, + "epoch": 0.9332333083270817, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1207305601210593, + "kl": 0.019317626953125, + "learning_rate": 1.122576325921095e-07, + "loss": 0.0408, + "num_tokens": 169060655.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8805487155914307, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.14260461885723316, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19071515421801727, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10671873729054747, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3732 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1261.6875, + "completions/mean_terminated_length": 1182.25, + "completions/min_length": 877.0, + "completions/min_terminated_length": 877.0, + "epoch": 0.9334833708427107, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4111375510467448, + "kl": 0.022674560546875, + "learning_rate": 1.1216674206977689e-07, + "loss": -0.0671, + "num_tokens": 169116866.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0373167991638184, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04773311478157664, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07631920859351922, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0909822937597079, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3733 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1453.0, + "completions/mean_length": 1218.625, + "completions/mean_terminated_length": 1153.6923828125, + "completions/min_length": 755.0, + "completions/min_terminated_length": 755.0, + "epoch": 0.9337334333583396, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4451221347064784, + "kl": 0.021484375, + "learning_rate": 1.1207618516183209e-07, + "loss": -0.0134, + "num_tokens": 169163300.0, + "reward": 3.725290298461914e-09, + "reward_std": 0.9931322932243347, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07904659435666805, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.098038873980686, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252809, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3734 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1377.0, + "completions/mean_length": 1164.125, + "completions/mean_terminated_length": 1141.7333984375, + "completions/min_length": 857.0, + "completions/min_terminated_length": 857.0, + "epoch": 0.9339834958739685, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5711985356582017, + "kl": 0.014404296875, + "learning_rate": 1.1198596193727631e-07, + "loss": -0.001, + "num_tokens": 169214174.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.765924334526062, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.059112013013092046, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08404646279367393, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3735 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1348.0, + "completions/max_terminated_length": 1348.0, + "completions/mean_length": 1069.625, + "completions/mean_terminated_length": 1069.625, + "completions/min_length": 822.0, + "completions/min_terminated_length": 822.0, + "epoch": 0.9342335583895974, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.172188370332883, + "kl": 0.01953125, + "learning_rate": 1.1189607246485672e-07, + "loss": -0.0013, + "num_tokens": 169262488.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.019342303276062, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.036775452688261066, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05784257617353264, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503963, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3736 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1441.0, + "completions/mean_length": 1418.0, + "completions/mean_terminated_length": 1281.3333740234375, + "completions/min_length": 1074.0, + "completions/min_terminated_length": 1074.0, + "epoch": 0.9344836209052263, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.668684848684593, + "kl": 0.02154541015625, + "learning_rate": 1.118065168130661e-07, + "loss": -0.0341, + "num_tokens": 169312752.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9535478353500366, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0044470183720703785, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08202775154768775, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316811, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3737 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1175.0, + "completions/mean_length": 1033.25, + "completions/mean_terminated_length": 966.5714721679688, + "completions/min_length": 545.0, + "completions/min_terminated_length": 545.0, + "epoch": 0.9347336834208552, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.569821512641045, + "kl": 0.018707275390625, + "learning_rate": 1.1171729505014289e-07, + "loss": 0.0339, + "num_tokens": 169348268.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8009868264198303, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1193132088031719, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07146392771249223, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.161245154965971, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3738 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1152.5, + "completions/mean_terminated_length": 1129.3333740234375, + "completions/min_length": 899.0, + "completions/min_terminated_length": 899.0, + "epoch": 0.9349837459364841, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9783983028935004, + "kl": 0.013702392578125, + "learning_rate": 1.1162840724407111e-07, + "loss": 0.0214, + "num_tokens": 169391380.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6526873111724854, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0786710469774868, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10366465518778446, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0758897836290186, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3739 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1433.0, + "completions/mean_length": 1011.1875, + "completions/mean_terminated_length": 978.6000366210938, + "completions/min_length": 650.0, + "completions/min_terminated_length": 650.0, + "epoch": 0.935233808452113, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5159255144300676, + "kl": 0.0167083740234375, + "learning_rate": 1.1153985346258031e-07, + "loss": -0.0276, + "num_tokens": 169423391.0, + "reward": -1.862645149230957e-08, + "reward_std": 0.9398580193519592, + "rewards/wordcountpos_reward_GEOBench/mean": -1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00792885966502574, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1125446273554091, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10174405069512348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3740 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1126.125, + "completions/mean_terminated_length": 1039.84619140625, + "completions/min_length": 775.0, + "completions/min_terminated_length": 775.0, + "epoch": 0.9354838709677419, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.969895817663745, + "kl": 0.0162353515625, + "learning_rate": 1.114516337731455e-07, + "loss": -0.0155, + "num_tokens": 169467329.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5059453845024109, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.22124966296782128, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.28061913062905786, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07781745019952499, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3741 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1392.0, + "completions/mean_length": 1343.5625, + "completions/mean_terminated_length": 1221.888916015625, + "completions/min_length": 1041.0, + "completions/min_terminated_length": 1041.0, + "epoch": 0.9357339334833709, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9206929587426864, + "kl": 0.0143890380859375, + "learning_rate": 1.1136374824298717e-07, + "loss": -0.0176, + "num_tokens": 169519674.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.7059236764907837, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.049344317717004114, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0825666060032947, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7374999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.05146016078626408, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3742 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1469.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1280.8125, + "completions/mean_terminated_length": 1280.8125, + "completions/min_length": 1039.0, + "completions/min_terminated_length": 1039.0, + "epoch": 0.9359839959989997, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.486819686894327, + "kl": 0.023040771484375, + "learning_rate": 1.1127619693907118e-07, + "loss": 0.0191, + "num_tokens": 169565967.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8846029043197632, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.006977152605395977, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15197538902927568, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1387777332977422, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3743 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1489.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1162.8125, + "completions/mean_terminated_length": 1162.8125, + "completions/min_length": 681.0, + "completions/min_terminated_length": 681.0, + "epoch": 0.9362340585146287, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.911777606118841, + "kl": 0.02392578125, + "learning_rate": 1.1118897992810876e-07, + "loss": -0.0358, + "num_tokens": 169601972.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0211304426193237, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07726274785723004, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06861200261819, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3744 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1369.0, + "completions/mean_length": 1331.9375, + "completions/mean_terminated_length": 1231.0999755859375, + "completions/min_length": 982.0, + "completions/min_terminated_length": 982.0, + "epoch": 0.9364841210302576, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1474223460556474, + "kl": 0.0117645263671875, + "learning_rate": 1.1110209727655625e-07, + "loss": 0.023, + "num_tokens": 169662547.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.987065851688385, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06475932793313578, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10065804466754494, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316813, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3745 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1444.0, + "completions/max_terminated_length": 1444.0, + "completions/mean_length": 1102.3125, + "completions/mean_terminated_length": 1102.3125, + "completions/min_length": 979.0, + "completions/min_terminated_length": 979.0, + "epoch": 0.9367341835458864, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.111485670733276, + "kl": 0.018646240234375, + "learning_rate": 1.1101554905061539e-07, + "loss": -0.0129, + "num_tokens": 169703232.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.5867870450019836, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1685538747715983, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12305949338663093, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.056927504255331086, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3746 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1287.375, + "completions/mean_terminated_length": 1190.727294921875, + "completions/min_length": 697.0, + "completions/min_terminated_length": 697.0, + "epoch": 0.9369842460615154, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8959820113526304, + "kl": 0.01739501953125, + "learning_rate": 1.1092933531623313e-07, + "loss": -0.0784, + "num_tokens": 169760358.0, + "reward": 0.0, + "reward_std": 0.9801734089851379, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08357400448967302, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10880865028575952, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715322, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3747 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1496.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1082.125, + "completions/mean_terminated_length": 1082.125, + "completions/min_length": 937.0, + "completions/min_terminated_length": 937.0, + "epoch": 0.9372343085771443, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7404199294411953, + "kl": 0.0128021240234375, + "learning_rate": 1.1084345613910139e-07, + "loss": -0.0047, + "num_tokens": 169809728.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9644354581832886, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.13827615539146962, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1568646788728784, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13984117975602023, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3748 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.9375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1351.0, + "completions/mean_length": 1490.6875, + "completions/mean_terminated_length": 1351.0, + "completions/min_length": 1351.0, + "completions/min_terminated_length": 1351.0, + "epoch": 0.9374843710927732, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.650351304667493, + "kl": 0.025146484375, + "learning_rate": 1.1075791158465729e-07, + "loss": -0.0044, + "num_tokens": 169881579.0, + "reward": 0.0, + "reward_std": 0.88329017162323, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0071670283144553626, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.055335396919035634, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1261979632400061, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3749 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1320.5625, + "completions/mean_terminated_length": 1260.75, + "completions/min_length": 963.0, + "completions/min_terminated_length": 963.0, + "epoch": 0.9377344336084021, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4084524152863818, + "kl": 0.0149993896484375, + "learning_rate": 1.106727017180829e-07, + "loss": -0.0641, + "num_tokens": 169932180.0, + "reward": 0.0, + "reward_std": 1.0258845090866089, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.028452377920597065, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11658045590004719, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07649739768026005, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3750 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1254.875, + "completions/mean_terminated_length": 1173.166748046875, + "completions/min_length": 891.0, + "completions/min_terminated_length": 891.0, + "epoch": 0.937984496124031, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3618036528527098, + "kl": 0.016632080078125, + "learning_rate": 1.1058782660430536e-07, + "loss": -0.0552, + "num_tokens": 169984082.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0449031591415405, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03920939602645947, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09032648346327138, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620105, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3751 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1310.0, + "completions/mean_length": 1146.125, + "completions/mean_terminated_length": 1095.571533203125, + "completions/min_length": 893.0, + "completions/min_terminated_length": 893.0, + "epoch": 0.9382345586396599, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7229996540073205, + "kl": 0.018890380859375, + "learning_rate": 1.1050328630799662e-07, + "loss": -0.0707, + "num_tokens": 170034980.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7098343372344971, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.046975473535144155, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.33386736475225415, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1276569477008451, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3752 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1408.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 940.3125, + "completions/mean_terminated_length": 940.3125, + "completions/min_length": 639.0, + "completions/min_terminated_length": 639.0, + "epoch": 0.9384846211552889, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.777234846667137, + "kl": 0.01885986328125, + "learning_rate": 1.1041908089357355e-07, + "loss": 0.0024, + "num_tokens": 170076945.0, + "reward": 4.470348358154297e-08, + "reward_std": 1.0004500150680542, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.054237635515501606, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0850704608385363, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09651328828101766, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3753 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1116.75, + "completions/mean_terminated_length": 1028.3077392578125, + "completions/min_length": 803.0, + "completions/min_terminated_length": 803.0, + "epoch": 0.9387346836709177, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9155670670954654, + "kl": 0.0198974609375, + "learning_rate": 1.1033521042519803e-07, + "loss": -0.01, + "num_tokens": 170132877.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9280862808227539, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.029181622087024038, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07248242975938413, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10809803506625451, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3754 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1429.0, + "completions/mean_length": 1269.25, + "completions/mean_terminated_length": 1164.3636474609375, + "completions/min_length": 802.0, + "completions/min_terminated_length": 802.0, + "epoch": 0.9389847461865466, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5440982617709733, + "kl": 0.017913818359375, + "learning_rate": 1.1025167496677643e-07, + "loss": -0.0385, + "num_tokens": 170182609.0, + "reward": 0.0, + "reward_std": 0.9184513688087463, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10067833038033251, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12555908790577958, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11279282877125756, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3755 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1428.0, + "completions/max_terminated_length": 1428.0, + "completions/mean_length": 1150.4375, + "completions/mean_terminated_length": 1150.4375, + "completions/min_length": 840.0, + "completions/min_terminated_length": 840.0, + "epoch": 0.9392348087021756, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.157317588525578, + "kl": 0.025848388671875, + "learning_rate": 1.1016847458195999e-07, + "loss": 0.0456, + "num_tokens": 170225616.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6481975317001343, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06750410835910484, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08871071747234419, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12049281521534187, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3756 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1185.625, + "completions/mean_terminated_length": 1113.076904296875, + "completions/min_length": 844.0, + "completions/min_terminated_length": 844.0, + "epoch": 0.9394848712178044, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.143625641436947, + "kl": 0.00890350341796875, + "learning_rate": 1.1008560933414477e-07, + "loss": -0.0087, + "num_tokens": 170267290.0, + "reward": 0.0, + "reward_std": 0.9062747955322266, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01606597966038691, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.026725292116477235, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.168599898949928, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3757 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1270.9375, + "completions/mean_terminated_length": 1218.0770263671875, + "completions/min_length": 1072.0, + "completions/min_terminated_length": 1072.0, + "epoch": 0.9397349337334333, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.944009488002827, + "kl": 0.01318359375, + "learning_rate": 1.1000307928647118e-07, + "loss": 0.0046, + "num_tokens": 170315049.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8368691205978394, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08525278167301681, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.052104682864225906, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10390522747338701, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3758 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1335.0, + "completions/max_terminated_length": 1335.0, + "completions/mean_length": 1016.4375, + "completions/mean_terminated_length": 1016.4375, + "completions/min_length": 785.0, + "completions/min_terminated_length": 785.0, + "epoch": 0.9399849962490623, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3720015260940697, + "kl": 0.0166168212890625, + "learning_rate": 1.0992088450182442e-07, + "loss": 0.0034, + "num_tokens": 170345864.0, + "reward": 0.0, + "reward_std": 0.7619072198867798, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11173780827711068, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08822257770404088, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10000000000000002, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3759 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1311.5, + "completions/mean_terminated_length": 1284.571533203125, + "completions/min_length": 980.0, + "completions/min_terminated_length": 980.0, + "epoch": 0.9402350587646912, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1424755214574644, + "kl": 0.04034423828125, + "learning_rate": 1.0983902504283421e-07, + "loss": -0.0457, + "num_tokens": 170393904.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.025816559791565, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.017553796209734122, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1443365741365279, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590962, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3760 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1348.6875, + "completions/mean_terminated_length": 1197.375, + "completions/min_length": 950.0, + "completions/min_terminated_length": 950.0, + "epoch": 0.9404851212803201, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7924898077670157, + "kl": 0.0175628662109375, + "learning_rate": 1.0975750097187475e-07, + "loss": -0.0108, + "num_tokens": 170445891.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9896355867385864, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.003053574650410823, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.00905425545510945, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10470416879457553, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3761 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1287.75, + "completions/mean_terminated_length": 1160.4000244140625, + "completions/min_length": 941.0, + "completions/min_terminated_length": 941.0, + "epoch": 0.940735183795949, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6796210523646184, + "kl": 0.018951416015625, + "learning_rate": 1.0967631235106465e-07, + "loss": 0.0393, + "num_tokens": 170501623.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0224671363830566, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04691437466307361, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.054812576063797656, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3762 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1387.0, + "completions/max_terminated_length": 1387.0, + "completions/mean_length": 1034.1875, + "completions/mean_terminated_length": 1034.1875, + "completions/min_length": 165.0, + "completions/min_terminated_length": 165.0, + "epoch": 0.9409852463115779, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.92462550510029, + "kl": 0.028594970703125, + "learning_rate": 1.0959545924226697e-07, + "loss": -0.019, + "num_tokens": 170548778.0, + "reward": 0.0, + "reward_std": 0.8709743022918701, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.002821773649859135, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05396939201460356, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1343709624716425, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3763 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1362.5, + "completions/mean_terminated_length": 1280.0, + "completions/min_length": 1050.0, + "completions/min_terminated_length": 1050.0, + "epoch": 0.9412353088272069, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.848210926135252, + "kl": 0.020599365234375, + "learning_rate": 1.0951494170708898e-07, + "loss": 0.0118, + "num_tokens": 170590106.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.814010500907898, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1265401904069634, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13401744836939347, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133131, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3764 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1343.0, + "completions/mean_length": 1339.1875, + "completions/mean_terminated_length": 1178.375, + "completions/min_length": 1093.0, + "completions/min_terminated_length": 1093.0, + "epoch": 0.9414853713428357, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2375961369344672, + "kl": 0.0154571533203125, + "learning_rate": 1.0943475980688246e-07, + "loss": -0.0334, + "num_tokens": 170640501.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.5831708908081055, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03425020305914779, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03670782324455935, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3765 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1450.0, + "completions/mean_length": 1389.0, + "completions/mean_terminated_length": 1302.6666259765625, + "completions/min_length": 1192.0, + "completions/min_terminated_length": 1192.0, + "epoch": 0.9417354338584646, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9242339423092045, + "kl": 0.0194091796875, + "learning_rate": 1.093549136027434e-07, + "loss": -0.0064, + "num_tokens": 170695301.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0559093952178955, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.021626030329372027, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07619873677937226, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13957607775504186, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3766 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1031.0, + "completions/max_terminated_length": 1031.0, + "completions/mean_length": 770.0625, + "completions/mean_terminated_length": 770.0625, + "completions/min_length": 669.0, + "completions/min_terminated_length": 669.0, + "epoch": 0.9419854963740936, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.87666569854833, + "kl": 0.0180511474609375, + "learning_rate": 1.0927540315551184e-07, + "loss": 0.0271, + "num_tokens": 170744222.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7120234966278076, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.014710744113458313, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14906085343864733, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09098229375970787, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3767 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1232.0, + "completions/mean_length": 1264.5, + "completions/mean_terminated_length": 1029.0, + "completions/min_length": 857.0, + "completions/min_terminated_length": 857.0, + "epoch": 0.9422355588897224, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.799727600621826, + "kl": 0.025390625, + "learning_rate": 1.0919622852577214e-07, + "loss": -0.0115, + "num_tokens": 170805766.0, + "reward": 0.0, + "reward_std": 0.9022330641746521, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.013479871862183532, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.032428453940437, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6749999999999999, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3768 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1490.0, + "completions/mean_length": 1323.1875, + "completions/mean_terminated_length": 1282.3846435546875, + "completions/min_length": 1116.0, + "completions/min_terminated_length": 1116.0, + "epoch": 0.9424856214053513, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7273343308376297, + "kl": 0.015106201171875, + "learning_rate": 1.0911738977385276e-07, + "loss": -0.0211, + "num_tokens": 170851353.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5132064819335938, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.015779783553798464, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05137081157401174, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14291929864761416, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3769 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1416.0, + "completions/max_terminated_length": 1416.0, + "completions/mean_length": 1173.9375, + "completions/mean_terminated_length": 1173.9375, + "completions/min_length": 831.0, + "completions/min_terminated_length": 831.0, + "epoch": 0.9427356839209803, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.430969819780302, + "kl": 0.020050048828125, + "learning_rate": 1.0903888695982625e-07, + "loss": -0.0261, + "num_tokens": 170890736.0, + "reward": 0.0, + "reward_std": 0.7334511280059814, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03126431144847216, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.032060469815444574, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12641788434189796, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3770 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1464.0, + "completions/max_terminated_length": 1464.0, + "completions/mean_length": 1062.25, + "completions/mean_terminated_length": 1062.25, + "completions/min_length": 787.0, + "completions/min_terminated_length": 787.0, + "epoch": 0.9429857464366092, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9340230739419035, + "kl": 0.02630615234375, + "learning_rate": 1.0896072014350898e-07, + "loss": 0.049, + "num_tokens": 170937108.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9963919520378113, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.09906336422547, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10013769145867223, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590966, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3771 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1275.9375, + "completions/mean_terminated_length": 1224.2308349609375, + "completions/min_length": 1047.0, + "completions/min_terminated_length": 1047.0, + "epoch": 0.943235808952238, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7506333867340813, + "kl": 0.0166778564453125, + "learning_rate": 1.088828893844616e-07, + "loss": 0.0267, + "num_tokens": 170978715.0, + "reward": 0.0, + "reward_std": 0.9670782089233398, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.24555170683341193, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2225388009972844, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06871842709362769, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3772 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1357.3125, + "completions/mean_terminated_length": 1309.75, + "completions/min_length": 1138.0, + "completions/min_terminated_length": 1138.0, + "epoch": 0.943485871467867, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.81801247765991, + "kl": 0.0198974609375, + "learning_rate": 1.0880539474198858e-07, + "loss": 0.0325, + "num_tokens": 171017312.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9843195080757141, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.024773201047297157, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04907057424230647, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10246950765959599, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3773 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1386.5625, + "completions/mean_terminated_length": 1348.75, + "completions/min_length": 1073.0, + "completions/min_terminated_length": 1073.0, + "epoch": 0.9437359339834959, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.316879640723234, + "kl": 0.020751953125, + "learning_rate": 1.0872823627513818e-07, + "loss": -0.0024, + "num_tokens": 171069017.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7426510453224182, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1414183674560033, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14228159964935938, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17207879332187076, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3774 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1471.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1203.25, + "completions/mean_terminated_length": 1203.25, + "completions/min_length": 727.0, + "completions/min_terminated_length": 727.0, + "epoch": 0.9439859964991247, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3798793236825095, + "kl": 0.01947021484375, + "learning_rate": 1.0865141404270264e-07, + "loss": -0.0381, + "num_tokens": 171122053.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9288386106491089, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04283620931120703, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08720665169232816, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16238956361284543, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3775 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1436.0, + "completions/max_terminated_length": 1436.0, + "completions/mean_length": 1075.8125, + "completions/mean_terminated_length": 1075.8125, + "completions/min_length": 800.0, + "completions/min_terminated_length": 800.0, + "epoch": 0.9442360590147537, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.341116763780623, + "kl": 0.018707275390625, + "learning_rate": 1.0857492810321795e-07, + "loss": 0.0266, + "num_tokens": 171153898.0, + "reward": 0.0, + "reward_std": 0.7018105983734131, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.20078659627148418, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18712501427812026, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10461569884316813, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3776 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1407.0, + "completions/mean_length": 1206.125, + "completions/mean_terminated_length": 1138.3077392578125, + "completions/min_length": 896.0, + "completions/min_terminated_length": 896.0, + "epoch": 0.9444861215303826, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1965667234860953, + "kl": 0.021240234375, + "learning_rate": 1.0849877851496376e-07, + "loss": -0.0168, + "num_tokens": 171199580.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9335830211639404, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08786865405839507, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1063478795055099, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3777 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1413.0, + "completions/max_terminated_length": 1413.0, + "completions/mean_length": 964.3125, + "completions/mean_terminated_length": 964.3125, + "completions/min_length": 669.0, + "completions/min_terminated_length": 669.0, + "epoch": 0.9447361840460115, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.169977463747259, + "kl": 0.0143890380859375, + "learning_rate": 1.0842296533596364e-07, + "loss": 0.0072, + "num_tokens": 171239985.0, + "reward": 0.0, + "reward_std": 1.0121691226959229, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.045179559110796255, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08994655103040936, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06440611887195309, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3778 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1205.0, + "completions/max_terminated_length": 1205.0, + "completions/mean_length": 974.125, + "completions/mean_terminated_length": 974.125, + "completions/min_length": 834.0, + "completions/min_terminated_length": 834.0, + "epoch": 0.9449862465616404, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8992994415958986, + "kl": 0.01390838623046875, + "learning_rate": 1.0834748862398466e-07, + "loss": -0.0433, + "num_tokens": 171270451.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.062233328819275, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0088782002238202, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0390944221601529, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3779 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1477.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1122.1875, + "completions/mean_terminated_length": 1122.1875, + "completions/min_length": 868.0, + "completions/min_terminated_length": 868.0, + "epoch": 0.9452363090772693, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.442484050912438, + "kl": 0.019378662109375, + "learning_rate": 1.0827234843653762e-07, + "loss": -0.0384, + "num_tokens": 171315486.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.0616614818572998, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02285976781513942, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10788602273955877, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08509254221575908, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3780 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1388.0, + "completions/mean_length": 1115.375, + "completions/mean_terminated_length": 1089.7333984375, + "completions/min_length": 736.0, + "completions/min_terminated_length": 736.0, + "epoch": 0.9454863715928983, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.387819926448556, + "kl": 0.026092529296875, + "learning_rate": 1.0819754483087676e-07, + "loss": -0.0569, + "num_tokens": 171353228.0, + "reward": 0.0, + "reward_std": 0.842126190662384, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04581029998966318, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07821664263246376, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12102953419784838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3781 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1415.0, + "completions/mean_length": 1103.0, + "completions/mean_terminated_length": 1076.533447265625, + "completions/min_length": 796.0, + "completions/min_terminated_length": 796.0, + "epoch": 0.9457364341085271, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7616569616761946, + "kl": 0.015106201171875, + "learning_rate": 1.0812307786399997e-07, + "loss": 0.0118, + "num_tokens": 171387972.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9614995718002319, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08392347354420654, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0896963191206291, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09067647005823631, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3782 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1426.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1203.0625, + "completions/mean_terminated_length": 1203.0625, + "completions/min_length": 1071.0, + "completions/min_terminated_length": 1071.0, + "epoch": 0.945986496624156, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.939234983773858, + "kl": 0.0147857666015625, + "learning_rate": 1.0804894759264861e-07, + "loss": -0.0164, + "num_tokens": 171429061.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0112570524215698, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01876694064957212, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09852393790587773, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11800816042090449, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3783 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1330.0, + "completions/mean_terminated_length": 1273.3333740234375, + "completions/min_length": 886.0, + "completions/min_terminated_length": 886.0, + "epoch": 0.946236559139785, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.344954344796532, + "kl": 0.01708984375, + "learning_rate": 1.0797515407330755e-07, + "loss": -0.0294, + "num_tokens": 171485525.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.814410924911499, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00033726908078737274, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.039448662958288055, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06978803887752091, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3784 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1304.0, + "completions/max_terminated_length": 1304.0, + "completions/mean_length": 1047.6875, + "completions/mean_terminated_length": 1047.6875, + "completions/min_length": 807.0, + "completions/min_terminated_length": 807.0, + "epoch": 0.9464866216554139, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.391995977615616, + "kl": 0.0185546875, + "learning_rate": 1.0790169736220489e-07, + "loss": -0.0567, + "num_tokens": 171527120.0, + "reward": 0.0, + "reward_std": 0.6676482558250427, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07633033419907649, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06933643470072381, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1270024788326182, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3785 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1103.0, + "completions/max_terminated_length": 1103.0, + "completions/mean_length": 824.625, + "completions/mean_terminated_length": 824.625, + "completions/min_length": 587.0, + "completions/min_terminated_length": 587.0, + "epoch": 0.9467366841710427, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.9086985904059945, + "kl": 0.017730712890625, + "learning_rate": 1.0782857751531225e-07, + "loss": -0.037, + "num_tokens": 171555522.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9184122085571289, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.020540494005217014, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08572267950911798, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14707015206910487, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3786 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1215.0, + "completions/mean_length": 1278.25, + "completions/mean_terminated_length": 993.1428833007812, + "completions/min_length": 340.0, + "completions/min_terminated_length": 340.0, + "epoch": 0.9469867466866717, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1945422106774943, + "kl": 0.01995849609375, + "learning_rate": 1.0775579458834453e-07, + "loss": 0.0116, + "num_tokens": 171615054.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5374252796173096, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.3593935867096097, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.4330373182270167, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12224747213928167, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3787 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1349.0, + "completions/mean_length": 1116.3125, + "completions/mean_terminated_length": 1090.7333984375, + "completions/min_length": 861.0, + "completions/min_terminated_length": 861.0, + "epoch": 0.9472368092023006, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8697315813929514, + "kl": 0.019775390625, + "learning_rate": 1.0768334863675983e-07, + "loss": -0.0243, + "num_tokens": 171651275.0, + "reward": 0.0, + "reward_std": 0.8972643613815308, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.052326076445716185, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1003225470133035, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11344765475923412, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3788 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1288.4375, + "completions/mean_terminated_length": 1192.272705078125, + "completions/min_length": 656.0, + "completions/min_terminated_length": 656.0, + "epoch": 0.9474868717179294, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3609759695186963, + "kl": 0.024505615234375, + "learning_rate": 1.0761123971575964e-07, + "loss": -0.0701, + "num_tokens": 171708178.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0114654302597046, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.2392673311181543, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.294481418510535, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15244914148902494, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3789 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 1382.9375, + "completions/mean_terminated_length": 1125.4000244140625, + "completions/min_length": 482.0, + "completions/min_terminated_length": 482.0, + "epoch": 0.9477369342335584, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3571089641901564, + "kl": 0.0101318359375, + "learning_rate": 1.0753946788028859e-07, + "loss": -0.0451, + "num_tokens": 171774601.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0584301948547363, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.20646224998989823, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09856311180953081, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10183501544346311, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3790 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1124.4375, + "completions/mean_terminated_length": 1070.7857666015625, + "completions/min_length": 799.0, + "completions/min_terminated_length": 799.0, + "epoch": 0.9479869967491873, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.606274659446445, + "kl": 0.01556396484375, + "learning_rate": 1.0746803318503434e-07, + "loss": -0.0495, + "num_tokens": 171818648.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9715002775192261, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11979258198769531, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.045940147857814306, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0958393717904348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3791 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1366.875, + "completions/mean_terminated_length": 1287.0, + "completions/min_length": 1122.0, + "completions/min_terminated_length": 1122.0, + "epoch": 0.9482370592648162, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3585268139972526, + "kl": 0.0144500732421875, + "learning_rate": 1.0739693568442779e-07, + "loss": -0.009, + "num_tokens": 171874918.0, + "reward": 0.0, + "reward_std": 0.8575986623764038, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08725537249885364, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2263069694320082, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07391185942027816, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3792 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1212.375, + "completions/mean_terminated_length": 1193.2000732421875, + "completions/min_length": 879.0, + "completions/min_terminated_length": 879.0, + "epoch": 0.9484871217804451, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7769745345131653, + "kl": 0.01849365234375, + "learning_rate": 1.073261754326429e-07, + "loss": -0.0315, + "num_tokens": 171929620.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.48209255933761597, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.19278347612778146, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2517906125115363, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1134476547592341, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3793 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1447.0, + "completions/mean_length": 1372.375, + "completions/mean_terminated_length": 1314.3636474609375, + "completions/min_length": 1180.0, + "completions/min_terminated_length": 1180.0, + "epoch": 0.948737184296074, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.494823352835216, + "kl": 0.0130157470703125, + "learning_rate": 1.072557524835966e-07, + "loss": 0.0075, + "num_tokens": 171969906.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8527266979217529, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10832208777942944, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19407191614664088, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3794 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1498.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1136.875, + "completions/mean_terminated_length": 1136.875, + "completions/min_length": 884.0, + "completions/min_terminated_length": 884.0, + "epoch": 0.9489872468117029, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5129359752575, + "kl": 0.0185089111328125, + "learning_rate": 1.0718566689094882e-07, + "loss": -0.0194, + "num_tokens": 172011664.0, + "reward": 0.0, + "reward_std": 0.8911680579185486, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.020862686434068224, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.032709250211024554, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16487930490266264, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3795 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1361.0, + "completions/mean_length": 1105.125, + "completions/mean_terminated_length": 1048.71435546875, + "completions/min_length": 712.0, + "completions/min_terminated_length": 712.0, + "epoch": 0.9492373093273319, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.05062337438898, + "kl": 0.0181732177734375, + "learning_rate": 1.071159187081025e-07, + "loss": 0.0099, + "num_tokens": 172056330.0, + "reward": 0.0, + "reward_std": 1.0518404245376587, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08026418071463552, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09487506746265001, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1270024788326182, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3796 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1482.0, + "completions/mean_length": 1273.25, + "completions/mean_terminated_length": 1240.857177734375, + "completions/min_length": 1042.0, + "completions/min_terminated_length": 1042.0, + "epoch": 0.9494873718429607, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.138055143771168, + "kl": 0.015411376953125, + "learning_rate": 1.070465079882034e-07, + "loss": 0.0078, + "num_tokens": 172109558.0, + "reward": 0.0, + "reward_std": 0.6458609104156494, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.009151789543030073, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2528242450383896, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09418264367902598, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3797 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1442.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1247.0625, + "completions/mean_terminated_length": 1247.0625, + "completions/min_length": 1037.0, + "completions/min_terminated_length": 1037.0, + "epoch": 0.9497374343585896, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.282719988488555, + "kl": 0.0225830078125, + "learning_rate": 1.0697743478414019e-07, + "loss": -0.0261, + "num_tokens": 172152767.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.0035452842712402, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.044945752690506255, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05276541059766826, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09418264367902596, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3798 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1283.125, + "completions/mean_terminated_length": 1252.1429443359375, + "completions/min_length": 908.0, + "completions/min_terminated_length": 908.0, + "epoch": 0.9499874968742186, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.672258371848487, + "kl": 0.0133209228515625, + "learning_rate": 1.0690869914854435e-07, + "loss": -0.0096, + "num_tokens": 172208529.0, + "reward": 0.0, + "reward_std": 0.8666238784790039, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06366595919609876, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0709824913069525, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3799 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1403.0, + "completions/max_terminated_length": 1403.0, + "completions/mean_length": 1081.625, + "completions/mean_terminated_length": 1081.625, + "completions/min_length": 790.0, + "completions/min_terminated_length": 790.0, + "epoch": 0.9502375593898474, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5352833775344297, + "kl": 0.02557373046875, + "learning_rate": 1.0684030113379015e-07, + "loss": -0.028, + "num_tokens": 172254219.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8791790008544922, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07463098282298919, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08725758015801707, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1270024788326182, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3800 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1348.25, + "completions/mean_terminated_length": 1257.2000732421875, + "completions/min_length": 894.0, + "completions/min_terminated_length": 894.0, + "epoch": 0.9504876219054764, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.116338320658924, + "kl": 0.020416259765625, + "learning_rate": 1.0677224079199465e-07, + "loss": -0.0671, + "num_tokens": 172312463.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6294140815734863, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10259811209423389, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.16710204592146236, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17164778752714224, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3801 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1067.25, + "completions/mean_terminated_length": 1038.4000244140625, + "completions/min_length": 816.0, + "completions/min_terminated_length": 816.0, + "epoch": 0.9507376844211053, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.577416876571747, + "kl": 0.01132965087890625, + "learning_rate": 1.0670451817501745e-07, + "loss": 0.0183, + "num_tokens": 172357979.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8796327114105225, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11423011332034774, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.088364453796898, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036264, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3802 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1385.0, + "completions/max_terminated_length": 1385.0, + "completions/mean_length": 928.5625, + "completions/mean_terminated_length": 928.5625, + "completions/min_length": 724.0, + "completions/min_terminated_length": 724.0, + "epoch": 0.9509877469367342, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.461180877802579, + "kl": 0.0185089111328125, + "learning_rate": 1.066371333344611e-07, + "loss": 0.0336, + "num_tokens": 172402932.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8940681219100952, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02766826359299442, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17115210728447663, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10878112581387149, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3803 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1285.0, + "completions/mean_length": 1343.0, + "completions/mean_terminated_length": 1141.1429443359375, + "completions/min_length": 996.0, + "completions/min_terminated_length": 996.0, + "epoch": 0.9512378094523631, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3942993386435596, + "kl": 0.0218505859375, + "learning_rate": 1.0657008632167044e-07, + "loss": -0.0231, + "num_tokens": 172457084.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9383769035339355, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.021533606062153956, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14956249400653068, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12464765155042849, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3804 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1494.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1230.375, + "completions/mean_terminated_length": 1230.375, + "completions/min_length": 942.0, + "completions/min_terminated_length": 942.0, + "epoch": 0.951487871967992, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.145843434164538, + "kl": 0.02117919921875, + "learning_rate": 1.0650337718773318e-07, + "loss": -0.0433, + "num_tokens": 172507578.0, + "reward": 0.0, + "reward_std": 0.7124574780464172, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.044268238293336155, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05725940177238088, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15533714826025885, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3805 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1172.375, + "completions/mean_terminated_length": 1150.533447265625, + "completions/min_length": 682.0, + "completions/min_terminated_length": 682.0, + "epoch": 0.9517379344836209, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.404299054203463, + "kl": 0.02301025390625, + "learning_rate": 1.0643700598347941e-07, + "loss": -0.0646, + "num_tokens": 172548336.0, + "reward": 0.0, + "reward_std": 0.8502640128135681, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.12291507899716062, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08524075293318999, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16815997674172586, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 3806 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1354.0, + "completions/max_terminated_length": 1354.0, + "completions/mean_length": 1002.6875, + "completions/mean_terminated_length": 1002.6875, + "completions/min_length": 531.0, + "completions/min_terminated_length": 531.0, + "epoch": 0.9519879969992499, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2429872687072443, + "kl": 0.02215576171875, + "learning_rate": 1.0637097275948183e-07, + "loss": 0.003, + "num_tokens": 172579171.0, + "reward": 0.0, + "reward_std": 0.9476653337478638, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.006657933036615681, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.062264723066517105, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298363, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3807 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1350.0, + "completions/mean_length": 1466.0, + "completions/mean_terminated_length": 1228.0, + "completions/min_length": 1106.0, + "completions/min_terminated_length": 1106.0, + "epoch": 0.9522380595148787, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.201972460843035, + "kl": 0.0140838623046875, + "learning_rate": 1.0630527756605548e-07, + "loss": 0.0258, + "num_tokens": 172637435.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9643556475639343, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07487054606831513, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06814273889739904, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1753303759784389, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3808 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1114.0, + "completions/max_terminated_length": 1114.0, + "completions/mean_length": 847.875, + "completions/mean_terminated_length": 847.875, + "completions/min_length": 667.0, + "completions/min_terminated_length": 667.0, + "epoch": 0.9524881220305076, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2706361430587823, + "kl": 0.0111541748046875, + "learning_rate": 1.0623992045325803e-07, + "loss": -0.0226, + "num_tokens": 172676777.0, + "reward": 0.0, + "reward_std": 0.9587114453315735, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03259585239161066, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06813331475500407, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078612, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3809 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1245.0, + "completions/max_terminated_length": 1245.0, + "completions/mean_length": 1056.875, + "completions/mean_terminated_length": 1056.875, + "completions/min_length": 633.0, + "completions/min_terminated_length": 633.0, + "epoch": 0.9527381845461366, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2407808798040665, + "kl": 0.0116424560546875, + "learning_rate": 1.0617490147088931e-07, + "loss": -0.0394, + "num_tokens": 172712831.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8464981317520142, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.019792513346274295, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07714626037879982, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07252075054258102, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 3810 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1422.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 1106.5625, + "completions/mean_terminated_length": 1106.5625, + "completions/min_length": 901.0, + "completions/min_terminated_length": 901.0, + "epoch": 0.9529882470617654, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.260440425550093, + "kl": 0.02154541015625, + "learning_rate": 1.061102206684917e-07, + "loss": -0.0645, + "num_tokens": 172745744.0, + "reward": 0.0, + "reward_std": 0.9132782816886902, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04234703296452713, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04860988117433075, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133131, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3811 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1462.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1089.0, + "completions/mean_terminated_length": 1089.0, + "completions/min_length": 767.0, + "completions/min_terminated_length": 767.0, + "epoch": 0.9532383095773943, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.124188927401488, + "kl": 0.017608642578125, + "learning_rate": 1.0604587809534976e-07, + "loss": -0.0132, + "num_tokens": 172779568.0, + "reward": -7.450580596923828e-09, + "reward_std": 0.9570242166519165, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0712055994370714, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05473220789794189, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08595864638818419, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3812 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1093.0, + "completions/mean_terminated_length": 1065.86669921875, + "completions/min_length": 788.0, + "completions/min_terminated_length": 788.0, + "epoch": 0.9534883720930233, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.412917288793452, + "kl": 0.00948333740234375, + "learning_rate": 1.0598187380049038e-07, + "loss": -0.0123, + "num_tokens": 172827288.0, + "reward": 0.0, + "reward_std": 0.7437519431114197, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04959622381689802, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.22423437999240803, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13871099718746435, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3813 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1305.8125, + "completions/mean_terminated_length": 1189.300048828125, + "completions/min_length": 940.0, + "completions/min_terminated_length": 940.0, + "epoch": 0.9537384346086522, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5055879233792293, + "kl": 0.0158538818359375, + "learning_rate": 1.0591820783268283e-07, + "loss": -0.047, + "num_tokens": 172874157.0, + "reward": 0.0, + "reward_std": 0.8169389367103577, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0170776016782556, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.060515174264125504, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08421753138505425, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3814 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 1021.9375, + "completions/mean_terminated_length": 911.6154174804688, + "completions/min_length": 450.0, + "completions/min_terminated_length": 450.0, + "epoch": 0.953988497124281, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7730404771875734, + "kl": 0.01541900634765625, + "learning_rate": 1.0585488024043837e-07, + "loss": -0.1017, + "num_tokens": 172918068.0, + "reward": 0.0, + "reward_std": 0.9344015121459961, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03245667034851222, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09531796721027949, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13984117975602023, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3815 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1328.0, + "completions/max_terminated_length": 1328.0, + "completions/mean_length": 971.8125, + "completions/mean_terminated_length": 971.8125, + "completions/min_length": 625.0, + "completions/min_terminated_length": 625.0, + "epoch": 0.95423855963991, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7363403419959402, + "kl": 0.02880859375, + "learning_rate": 1.0579189107201055e-07, + "loss": -0.0661, + "num_tokens": 172947473.0, + "reward": 0.0, + "reward_std": 0.8471357822418213, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07814581830869166, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1430769983444435, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333334, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 3816 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1129.75, + "completions/mean_terminated_length": 907.6000366210938, + "completions/min_length": 617.0, + "completions/min_terminated_length": 617.0, + "epoch": 0.9544886221555389, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.985219877484492, + "kl": 0.020050048828125, + "learning_rate": 1.0572924037539495e-07, + "loss": -0.0703, + "num_tokens": 172998869.0, + "reward": 0.0, + "reward_std": 0.7150782346725464, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01243990356674279, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07198625203277192, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14168300559373406, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3817 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1351.0, + "completions/max_terminated_length": 1351.0, + "completions/mean_length": 1021.5, + "completions/mean_terminated_length": 1021.5, + "completions/min_length": 757.0, + "completions/min_terminated_length": 757.0, + "epoch": 0.9547386846711677, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.673105070079273, + "kl": 0.02496337890625, + "learning_rate": 1.056669281983294e-07, + "loss": -0.0145, + "num_tokens": 173041045.0, + "reward": -1.1175870895385742e-08, + "reward_std": 1.063668131828308, + "rewards/wordcountpos_reward_GEOBench/mean": -1.1175870895385742e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06138807590716812, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13238199247587765, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11409872268574492, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3818 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1324.25, + "completions/mean_terminated_length": 1299.1429443359375, + "completions/min_length": 874.0, + "completions/min_terminated_length": 874.0, + "epoch": 0.9549887471867967, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.157121314520134, + "kl": 0.022613525390625, + "learning_rate": 1.056049545882937e-07, + "loss": -0.0422, + "num_tokens": 173090961.0, + "reward": 0.0, + "reward_std": 1.0424120426177979, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03367083938407522, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07860657077294778, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13305526559931294, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3819 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1304.0, + "completions/mean_length": 1201.25, + "completions/mean_terminated_length": 1101.666748046875, + "completions/min_length": 726.0, + "completions/min_terminated_length": 726.0, + "epoch": 0.9552388097024256, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.443479844300672, + "kl": 0.031097412109375, + "learning_rate": 1.055433195925096e-07, + "loss": 0.0336, + "num_tokens": 173144781.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7275317907333374, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0019235942819046106, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.046128675019707736, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15563490039905004, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3820 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1417.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1218.0625, + "completions/mean_terminated_length": 1218.0625, + "completions/min_length": 981.0, + "completions/min_terminated_length": 981.0, + "epoch": 0.9554888722180546, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.394363429471651, + "kl": 0.021484375, + "learning_rate": 1.0548202325794106e-07, + "loss": 0.0097, + "num_tokens": 173191590.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0649147033691406, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.016330211174757935, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07115477532561941, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1377060745318193, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3821 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 1335.4375, + "completions/mean_terminated_length": 1280.5833740234375, + "completions/min_length": 1075.0, + "completions/min_terminated_length": 1075.0, + "epoch": 0.9557389347336834, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3212494611422696, + "kl": 0.01141357421875, + "learning_rate": 1.0542106563129368e-07, + "loss": -0.0105, + "num_tokens": 173240381.0, + "reward": 0.0, + "reward_std": 0.7216614484786987, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03965837037543832, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.18880017250762546, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12171612389003693, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3822 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1143.375, + "completions/mean_terminated_length": 1061.076904296875, + "completions/min_length": 790.0, + "completions/min_terminated_length": 790.0, + "epoch": 0.9559889972493123, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3796285041534992, + "kl": 0.023040771484375, + "learning_rate": 1.0536044675901533e-07, + "loss": -0.0336, + "num_tokens": 173279523.0, + "reward": 0.0, + "reward_std": 0.5529353618621826, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08685872059444542, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10597202169197194, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12464765155042849, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3823 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1432.0, + "completions/mean_length": 1117.1875, + "completions/mean_terminated_length": 989.5833740234375, + "completions/min_length": 758.0, + "completions/min_terminated_length": 758.0, + "epoch": 0.9562390597649413, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.002687386944794, + "kl": 0.024139404296875, + "learning_rate": 1.0530016668729537e-07, + "loss": 0.0079, + "num_tokens": 173328910.0, + "reward": 0.0, + "reward_std": 1.0681593418121338, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03534745407890144, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1361831967827029, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08388704928078612, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3824 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1448.0, + "completions/mean_length": 1372.5625, + "completions/mean_terminated_length": 1245.125, + "completions/min_length": 1034.0, + "completions/min_terminated_length": 1034.0, + "epoch": 0.9564891222805701, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.742512540441348, + "kl": 0.017303466796875, + "learning_rate": 1.0524022546206537e-07, + "loss": -0.0118, + "num_tokens": 173381359.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8766384720802307, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04908626880447284, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09833558207912721, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.131021626713557, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3825 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1487.0, + "completions/max_terminated_length": 1487.0, + "completions/mean_length": 1079.3125, + "completions/mean_terminated_length": 1079.3125, + "completions/min_length": 555.0, + "completions/min_terminated_length": 555.0, + "epoch": 0.956739184796199, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6020866988721316, + "kl": 0.01690673828125, + "learning_rate": 1.0518062312899852e-07, + "loss": 0.0136, + "num_tokens": 173420980.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9898308515548706, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.12207936115279537, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11770942988411713, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11792967144619461, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3826 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1495.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1199.0, + "completions/mean_terminated_length": 1199.0, + "completions/min_length": 393.0, + "completions/min_terminated_length": 393.0, + "epoch": 0.956989247311828, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0954312595393327, + "kl": 0.0198211669921875, + "learning_rate": 1.0512135973350976e-07, + "loss": -0.1252, + "num_tokens": 173460188.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.041907787322998, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.02497211110602738, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0632716166191165, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11855612829185827, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3827 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1363.4375, + "completions/mean_terminated_length": 1301.3636474609375, + "completions/min_length": 969.0, + "completions/min_terminated_length": 969.0, + "epoch": 0.9572393098274569, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1645691934840436, + "kl": 0.01995849609375, + "learning_rate": 1.0506243532075589e-07, + "loss": 0.0182, + "num_tokens": 173506923.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7643247842788696, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05839399135057993, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.041724058753611966, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12224747213928167, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3828 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1277.375, + "completions/mean_terminated_length": 1262.533447265625, + "completions/min_length": 1007.0, + "completions/min_terminated_length": 1007.0, + "epoch": 0.9574893723430857, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3644260739396223, + "kl": 0.0196990966796875, + "learning_rate": 1.0500384993563532e-07, + "loss": -0.0149, + "num_tokens": 173554273.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.8792661428451538, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01697895553325053, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07463251330496276, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09651328828101766, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3829 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1437.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1151.0625, + "completions/mean_terminated_length": 1151.0625, + "completions/min_length": 849.0, + "completions/min_terminated_length": 849.0, + "epoch": 0.9577394348587147, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.437815578109961, + "kl": 0.023895263671875, + "learning_rate": 1.0494560362278818e-07, + "loss": 0.0565, + "num_tokens": 173605658.0, + "reward": 0.0, + "reward_std": 0.9306818842887878, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.021003559805452092, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04809633706639413, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066471, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3830 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1284.625, + "completions/mean_terminated_length": 1186.727294921875, + "completions/min_length": 897.0, + "completions/min_terminated_length": 897.0, + "epoch": 0.9579894973743436, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0183295714478784, + "kl": 0.017425537109375, + "learning_rate": 1.0488769642659618e-07, + "loss": -0.0079, + "num_tokens": 173659420.0, + "reward": 0.0, + "reward_std": 0.19549036026000977, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00859053393085808, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13522408367090424, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13662601021279466, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3831 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1255.8125, + "completions/mean_terminated_length": 1109.300048828125, + "completions/min_length": 700.0, + "completions/min_terminated_length": 700.0, + "epoch": 0.9582395598899724, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0335018143682317, + "kl": 0.01422119140625, + "learning_rate": 1.0483012839118276e-07, + "loss": -0.0432, + "num_tokens": 173709585.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0150920152664185, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.16752210800982292, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.300362747652111, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15049301694147857, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3832 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1329.0, + "completions/max_terminated_length": 1329.0, + "completions/mean_length": 1045.375, + "completions/mean_terminated_length": 1045.375, + "completions/min_length": 766.0, + "completions/min_terminated_length": 766.0, + "epoch": 0.9584896224056014, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.501766971183797, + "kl": 0.01922607421875, + "learning_rate": 1.0477289956041285e-07, + "loss": 0.0075, + "num_tokens": 173755607.0, + "reward": 0.0, + "reward_std": 1.0654603242874146, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0274820214606731, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06832542772935288, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3833 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1313.0, + "completions/max_terminated_length": 1313.0, + "completions/mean_length": 963.9375, + "completions/mean_terminated_length": 963.9375, + "completions/min_length": 745.0, + "completions/min_terminated_length": 745.0, + "epoch": 0.9587396849212303, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.643801299623321, + "kl": 0.0175018310546875, + "learning_rate": 1.0471600997789279e-07, + "loss": -0.065, + "num_tokens": 173807910.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8040734529495239, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.006107045738752596, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0912323146488055, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11013459778666117, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3834 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1215.0625, + "completions/mean_terminated_length": 993.4444580078125, + "completions/min_length": 799.0, + "completions/min_terminated_length": 799.0, + "epoch": 0.9589897474368592, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1484762037792975, + "kl": 0.0158538818359375, + "learning_rate": 1.0465945968697064e-07, + "loss": 0.042, + "num_tokens": 173858919.0, + "reward": 0.0, + "reward_std": 0.5646531581878662, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11856945875948573, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.131032502133974, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15371932093796678, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3835 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1262.875, + "completions/mean_terminated_length": 1208.1539306640625, + "completions/min_length": 837.0, + "completions/min_terminated_length": 837.0, + "epoch": 0.9592398099524881, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.144513217700684, + "kl": 0.024688720703125, + "learning_rate": 1.0460324873073584e-07, + "loss": 0.0706, + "num_tokens": 173908565.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7290467023849487, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11808510802498333, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1356820475655431, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11279282877125756, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3836 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1459.0, + "completions/mean_length": 1147.1875, + "completions/mean_terminated_length": 1096.7857666015625, + "completions/min_length": 838.0, + "completions/min_terminated_length": 838.0, + "epoch": 0.959489872468117, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.88795880297601, + "kl": 0.0176849365234375, + "learning_rate": 1.0454737715201927e-07, + "loss": -0.0509, + "num_tokens": 173949984.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.2869378924369812, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10041862485164349, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1870249863151274, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12852438880818895, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3837 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1485.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1226.6875, + "completions/mean_terminated_length": 1226.6875, + "completions/min_length": 1079.0, + "completions/min_terminated_length": 1079.0, + "epoch": 0.959739934983746, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.7186487107001294, + "kl": 0.01248931884765625, + "learning_rate": 1.044918449933932e-07, + "loss": -0.0088, + "num_tokens": 173995883.0, + "reward": 0.0, + "reward_std": 0.912563681602478, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08856075827021953, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07436737884551176, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11122216672215288, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3838 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.8125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1481.875, + "completions/mean_terminated_length": 1403.3333740234375, + "completions/min_length": 1333.0, + "completions/min_terminated_length": 1333.0, + "epoch": 0.9599899974993749, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.279330950820505, + "kl": 0.0133209228515625, + "learning_rate": 1.0443665229717135e-07, + "loss": -0.0026, + "num_tokens": 174055505.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.0660961866378784, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08907863796567171, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08827326175428607, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3839 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1323.125, + "completions/mean_terminated_length": 1217.0, + "completions/min_length": 514.0, + "completions/min_terminated_length": 514.0, + "epoch": 0.9602400600150037, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.739580333269062, + "kl": 0.01416015625, + "learning_rate": 1.0438179910540862e-07, + "loss": -0.0178, + "num_tokens": 174116179.0, + "reward": 0.0, + "reward_std": 0.7369815111160278, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07378639813691626, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1213932818549499, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620105, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3840 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1227.0, + "completions/max_terminated_length": 1227.0, + "completions/mean_length": 962.5625, + "completions/mean_terminated_length": 962.5625, + "completions/min_length": 632.0, + "completions/min_terminated_length": 632.0, + "epoch": 0.9604901225306327, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.699780141975631, + "kl": 0.0224609375, + "learning_rate": 1.0432728545990134e-07, + "loss": -0.0006, + "num_tokens": 174160348.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.986216127872467, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10628274138571238, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08153499750207727, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11894598836509011, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3841 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1471.0, + "completions/mean_length": 1304.5625, + "completions/mean_terminated_length": 1276.6429443359375, + "completions/min_length": 997.0, + "completions/min_terminated_length": 997.0, + "epoch": 0.9607401850462616, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.054103883975406, + "kl": 0.022003173828125, + "learning_rate": 1.042731114021872e-07, + "loss": 0.0377, + "num_tokens": 174213109.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.27061519026756287, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.047381751353146304, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.21549212865348732, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.18216801361705953, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3842 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1138.75, + "completions/mean_terminated_length": 1114.666748046875, + "completions/min_length": 885.0, + "completions/min_terminated_length": 885.0, + "epoch": 0.9609902475618904, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3530874399142414, + "kl": 0.02294921875, + "learning_rate": 1.0421927697354491e-07, + "loss": -0.0592, + "num_tokens": 174256017.0, + "reward": 0.0, + "reward_std": 0.7898848056793213, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.038059658849294464, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06878591449255782, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12931443160847217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3843 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1445.25, + "completions/mean_terminated_length": 1374.857177734375, + "completions/min_length": 1220.0, + "completions/min_terminated_length": 1220.0, + "epoch": 0.9612403100775194, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4676408884717804, + "kl": 0.0172119140625, + "learning_rate": 1.0416578221499454e-07, + "loss": -0.0133, + "num_tokens": 174303109.0, + "reward": 0.0, + "reward_std": 1.0303678512573242, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1043393063733017, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17290275761460647, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08595864638818419, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3844 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1428.0, + "completions/mean_length": 1292.8125, + "completions/mean_terminated_length": 1223.75, + "completions/min_length": 844.0, + "completions/min_terminated_length": 844.0, + "epoch": 0.9614903725931483, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.844224656384447, + "kl": 0.018524169921875, + "learning_rate": 1.0411262716729736e-07, + "loss": 0.0279, + "num_tokens": 174344530.0, + "reward": 0.0, + "reward_std": 0.6866085529327393, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.2818656941908776, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3767098566242801, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3845 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1494.0, + "completions/mean_length": 1252.9375, + "completions/mean_terminated_length": 1140.6363525390625, + "completions/min_length": 691.0, + "completions/min_terminated_length": 691.0, + "epoch": 0.9617404351087772, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.000545707159687, + "kl": 0.01708984375, + "learning_rate": 1.0405981187095572e-07, + "loss": -0.0364, + "num_tokens": 174398609.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0010253190994263, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03416663890101791, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04549563489294919, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07588978362901862, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3846 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1466.0, + "completions/mean_length": 1311.25, + "completions/mean_terminated_length": 1225.45458984375, + "completions/min_length": 1041.0, + "completions/min_terminated_length": 1041.0, + "epoch": 0.9619904976244061, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.98478618660373, + "kl": 0.0184478759765625, + "learning_rate": 1.0400733636621316e-07, + "loss": 0.0167, + "num_tokens": 174452589.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7024484276771545, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02817845922060564, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06657627984907762, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3847 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1361.1875, + "completions/mean_terminated_length": 1298.0909423828125, + "completions/min_length": 1083.0, + "completions/min_terminated_length": 1083.0, + "epoch": 0.962240560140035, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.11685650560524, + "kl": 0.01934814453125, + "learning_rate": 1.0395520069305417e-07, + "loss": 0.0205, + "num_tokens": 174499648.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6205862760543823, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.044238421530452, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06491986342292541, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12931443160847217, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3848 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1156.0, + "completions/mean_length": 1261.0, + "completions/mean_terminated_length": 1022.0, + "completions/min_length": 887.0, + "completions/min_terminated_length": 887.0, + "epoch": 0.9624906226556639, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3362407490230677, + "kl": 0.02496337890625, + "learning_rate": 1.039034048912045e-07, + "loss": -0.0133, + "num_tokens": 174552072.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0019835233688354, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05742211168239691, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03826313392282281, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14240006242195888, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3849 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1196.0, + "completions/max_terminated_length": 1196.0, + "completions/mean_length": 822.875, + "completions/mean_terminated_length": 822.875, + "completions/min_length": 612.0, + "completions/min_terminated_length": 612.0, + "epoch": 0.9627406851712929, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.306144476169679, + "kl": 0.02960205078125, + "learning_rate": 1.0385194900013084e-07, + "loss": -0.0255, + "num_tokens": 174591222.0, + "reward": 0.0, + "reward_std": 1.0540485382080078, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06121221029050839, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14368842862833084, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13743685418725538, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3850 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1287.0, + "completions/mean_length": 1169.8125, + "completions/mean_terminated_length": 1093.615478515625, + "completions/min_length": 740.0, + "completions/min_terminated_length": 740.0, + "epoch": 0.9629907476869217, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1468661667484787, + "kl": 0.019744873046875, + "learning_rate": 1.0380083305904076e-07, + "loss": 0.0067, + "num_tokens": 174637155.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8402243256568909, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07533399279481234, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06199252085363856, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1517795672580372, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3851 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1408.0, + "completions/mean_length": 1326.4375, + "completions/mean_terminated_length": 1191.4444580078125, + "completions/min_length": 1003.0, + "completions/min_terminated_length": 1003.0, + "epoch": 0.9632408102025506, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.640709751322593, + "kl": 0.014678955078125, + "learning_rate": 1.03750057106883e-07, + "loss": 0.0042, + "num_tokens": 174682818.0, + "reward": 0.0, + "reward_std": 0.9733672738075256, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08894021336661323, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05803105223491756, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09098229375970786, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3852 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.75, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1432.75, + "completions/mean_terminated_length": 1231.0, + "completions/min_length": 1024.0, + "completions/min_terminated_length": 1024.0, + "epoch": 0.9634908727181796, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1461614089000802, + "kl": 0.0104522705078125, + "learning_rate": 1.036996211823471e-07, + "loss": 0.0068, + "num_tokens": 174741094.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8507846593856812, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07641322172266851, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1399166362082524, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.18170998464178714, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3853 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1341.0, + "completions/mean_length": 1188.9375, + "completions/mean_terminated_length": 1047.5455322265625, + "completions/min_length": 791.0, + "completions/min_terminated_length": 791.0, + "epoch": 0.9637409352338084, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5046785671892313, + "kl": 0.021636962890625, + "learning_rate": 1.0364952532386359e-07, + "loss": 0.0259, + "num_tokens": 174796901.0, + "reward": 0.0, + "reward_std": 1.0272912979125977, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0710879906023446, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09162145944830533, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.131021626713557, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3854 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1427.0, + "completions/max_terminated_length": 1427.0, + "completions/mean_length": 1063.75, + "completions/mean_terminated_length": 1063.75, + "completions/min_length": 855.0, + "completions/min_terminated_length": 855.0, + "epoch": 0.9639909977494373, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.224923802882648, + "kl": 0.0132293701171875, + "learning_rate": 1.0359976956960378e-07, + "loss": -0.0396, + "num_tokens": 174831881.0, + "reward": 0.0, + "reward_std": 0.9489963054656982, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08319234680412152, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06499221671886424, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06206328908341753, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3855 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1461.0, + "completions/mean_length": 1321.125, + "completions/mean_terminated_length": 1182.0, + "completions/min_length": 862.0, + "completions/min_terminated_length": 862.0, + "epoch": 0.9642410602650663, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3268108675185504, + "kl": 0.01324462890625, + "learning_rate": 1.0355035395748004e-07, + "loss": 0.016, + "num_tokens": 174876115.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6029912233352661, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0011733761275487115, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15287317569169945, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13305526559931294, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3856 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1329.875, + "completions/mean_terminated_length": 1273.166748046875, + "completions/min_length": 1093.0, + "completions/min_terminated_length": 1093.0, + "epoch": 0.9644911227806952, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.736614859631034, + "kl": 0.013641357421875, + "learning_rate": 1.0350127852514535e-07, + "loss": -0.0202, + "num_tokens": 174926017.0, + "reward": 0.0, + "reward_std": 0.9627690315246582, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01074726411889879, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.034257557477156135, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7708333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.19163043135739746, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3857 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1314.0, + "completions/mean_length": 1128.1875, + "completions/mean_terminated_length": 1103.4000244140625, + "completions/min_length": 755.0, + "completions/min_terminated_length": 755.0, + "epoch": 0.9647411852963241, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3924596154741233, + "kl": 0.021636962890625, + "learning_rate": 1.0345254330999353e-07, + "loss": 0.043, + "num_tokens": 174959772.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0676445960998535, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03959588160880203, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08074748067484606, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07097208632298362, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3858 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1316.1875, + "completions/mean_terminated_length": 1205.9000244140625, + "completions/min_length": 911.0, + "completions/min_terminated_length": 911.0, + "epoch": 0.964991247811953, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.298522298212393, + "kl": 0.02178955078125, + "learning_rate": 1.0340414834915913e-07, + "loss": -0.0168, + "num_tokens": 175013607.0, + "reward": -2.60770320892334e-08, + "reward_std": 1.0611785650253296, + "rewards/wordcountpos_reward_GEOBench/mean": -2.60770320892334e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06776800883056988, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10852090070175209, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715323, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3859 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1219.9375, + "completions/mean_terminated_length": 1201.2667236328125, + "completions/min_length": 791.0, + "completions/min_terminated_length": 791.0, + "epoch": 0.9652413103275819, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.24840955826432, + "kl": 0.0277099609375, + "learning_rate": 1.0335609367951765e-07, + "loss": -0.0255, + "num_tokens": 175062398.0, + "reward": 0.0, + "reward_std": 0.6940068602561951, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07376410269665663, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06409354331184164, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1102186379345533, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3860 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1355.875, + "completions/mean_terminated_length": 1243.77783203125, + "completions/min_length": 1081.0, + "completions/min_terminated_length": 1081.0, + "epoch": 0.9654913728432108, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9826515816395194, + "kl": 0.01873779296875, + "learning_rate": 1.033083793376851e-07, + "loss": -0.012, + "num_tokens": 175110564.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.675559401512146, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.012737495298943129, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11448736041491427, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09098229375970789, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3861 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1352.4375, + "completions/mean_terminated_length": 1303.25, + "completions/min_length": 926.0, + "completions/min_terminated_length": 926.0, + "epoch": 0.9657414353588397, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7878054951839215, + "kl": 0.014007568359375, + "learning_rate": 1.0326100536001816e-07, + "loss": -0.0507, + "num_tokens": 175155499.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8931111097335815, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0329996225795048, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08532797489364194, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567835, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3862 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1302.8125, + "completions/mean_terminated_length": 1289.666748046875, + "completions/min_length": 1166.0, + "completions/min_terminated_length": 1166.0, + "epoch": 0.9659914978744686, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1105825761401156, + "kl": 0.0149383544921875, + "learning_rate": 1.0321397178261429e-07, + "loss": 0.0186, + "num_tokens": 175203376.0, + "reward": 0.0, + "reward_std": 1.0455527305603027, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00916638133016225, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03335829087573954, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639732, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3863 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1253.0, + "completions/max_terminated_length": 1253.0, + "completions/mean_length": 958.375, + "completions/mean_terminated_length": 958.375, + "completions/min_length": 649.0, + "completions/min_terminated_length": 649.0, + "epoch": 0.9662415603900976, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.111527528097436, + "kl": 0.023529052734375, + "learning_rate": 1.0316727864131154e-07, + "loss": 0.003, + "num_tokens": 175239990.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7932150363922119, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07368070589897165, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06840180508221215, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12041594578792297, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3864 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1338.3125, + "completions/mean_terminated_length": 1264.8182373046875, + "completions/min_length": 987.0, + "completions/min_terminated_length": 987.0, + "epoch": 0.9664916229057264, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0607779186880433, + "kl": 0.0157318115234375, + "learning_rate": 1.0312092597168844e-07, + "loss": -0.0053, + "num_tokens": 175279571.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0582246780395508, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05800022656894397, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11862755112453814, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11894598836509011, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 3865 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1474.0, + "completions/mean_length": 1219.0625, + "completions/mean_terminated_length": 1125.416748046875, + "completions/min_length": 642.0, + "completions/min_terminated_length": 642.0, + "epoch": 0.9667416854213553, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.748688714191944, + "kl": 0.0148773193359375, + "learning_rate": 1.0307491380906422e-07, + "loss": -0.0669, + "num_tokens": 175318348.0, + "reward": 0.0, + "reward_std": 0.7691434621810913, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04900774744992282, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08036584081961401, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13984117975602023, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3866 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1336.3125, + "completions/mean_terminated_length": 1281.75, + "completions/min_length": 1006.0, + "completions/min_terminated_length": 1006.0, + "epoch": 0.9669917479369843, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0484186661646677, + "kl": 0.0213623046875, + "learning_rate": 1.0302924218849867e-07, + "loss": 0.0311, + "num_tokens": 175371561.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0509467124938965, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010340400220733638, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04519867723213312, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08766518798921945, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3867 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1274.875, + "completions/mean_terminated_length": 1049.75, + "completions/min_length": 856.0, + "completions/min_terminated_length": 856.0, + "epoch": 0.9672418104526131, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.831906952745982, + "kl": 0.0149993896484375, + "learning_rate": 1.0298391114479198e-07, + "loss": -0.0326, + "num_tokens": 175419791.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9536529183387756, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0887438029487698, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06857355322558202, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13743685418725538, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3868 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1376.0, + "completions/max_terminated_length": 1376.0, + "completions/mean_length": 1072.0625, + "completions/mean_terminated_length": 1072.0625, + "completions/min_length": 695.0, + "completions/min_terminated_length": 695.0, + "epoch": 0.967491872968242, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.512618851263117, + "kl": 0.02783203125, + "learning_rate": 1.0293892071248492e-07, + "loss": 0.0067, + "num_tokens": 175461776.0, + "reward": 0.0, + "reward_std": 0.6014959812164307, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.030219703179968905, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15045300593076813, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8291666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09418264367902598, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3869 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1165.25, + "completions/mean_terminated_length": 1142.933349609375, + "completions/min_length": 806.0, + "completions/min_terminated_length": 806.0, + "epoch": 0.967741935483871, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.058448086017135, + "kl": 0.01023101806640625, + "learning_rate": 1.0289427092585873e-07, + "loss": -0.0205, + "num_tokens": 175517764.0, + "reward": 0.0, + "reward_std": 0.942789614200592, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.20475885228051363, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12116165853770902, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09888264649460884, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3870 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1428.0, + "completions/mean_length": 1373.1875, + "completions/mean_terminated_length": 1315.5455322265625, + "completions/min_length": 1068.0, + "completions/min_terminated_length": 1068.0, + "epoch": 0.9679919979994999, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.039248704759692, + "kl": 0.022491455078125, + "learning_rate": 1.0284996181893503e-07, + "loss": -0.0012, + "num_tokens": 175559655.0, + "reward": 0.0, + "reward_std": 0.8622609376907349, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.00428596698292362, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13549410491124791, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941136, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3871 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1360.0, + "completions/mean_length": 1134.5, + "completions/mean_terminated_length": 1082.2857666015625, + "completions/min_length": 792.0, + "completions/min_terminated_length": 792.0, + "epoch": 0.9682420605151287, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2288371410868906, + "kl": 0.0168914794921875, + "learning_rate": 1.0280599342547587e-07, + "loss": 0.0344, + "num_tokens": 175593383.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0067131519317627, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.040127307861546085, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05122488053880828, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.059004080210452226, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3872 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1367.0, + "completions/mean_length": 1286.375, + "completions/mean_terminated_length": 1120.2222900390625, + "completions/min_length": 810.0, + "completions/min_terminated_length": 810.0, + "epoch": 0.9684921230307577, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0065845426507662, + "kl": 0.0242462158203125, + "learning_rate": 1.0276236577898371e-07, + "loss": -0.0828, + "num_tokens": 175656997.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0179150104522705, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.008991172729390443, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08459566483268736, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09878896324620104, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3873 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1445.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1174.5, + "completions/mean_terminated_length": 1174.5, + "completions/min_length": 1027.0, + "completions/min_terminated_length": 1027.0, + "epoch": 0.9687421855463866, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.384019767819348, + "kl": 0.01739501953125, + "learning_rate": 1.0271907891270145e-07, + "loss": -0.0174, + "num_tokens": 175703261.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.053987979888916, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.016031901218280388, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.053934769394769856, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7333333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08777074514725114, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3874 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1162.5, + "completions/mean_terminated_length": 1084.615478515625, + "completions/min_length": 795.0, + "completions/min_terminated_length": 795.0, + "epoch": 0.9689922480620154, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3811963307341806, + "kl": 0.019012451171875, + "learning_rate": 1.0267613285961221e-07, + "loss": 0.006, + "num_tokens": 175756365.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0462446212768555, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1445206371460932, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08842457335878869, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11344765475923412, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3875 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1417.0, + "completions/mean_length": 1134.0625, + "completions/mean_terminated_length": 1081.7857666015625, + "completions/min_length": 918.0, + "completions/min_terminated_length": 918.0, + "epoch": 0.9692423105776444, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7647321624878867, + "kl": 0.019561767578125, + "learning_rate": 1.0263352765243931e-07, + "loss": 0.0376, + "num_tokens": 175806870.0, + "reward": 0.0, + "reward_std": 0.8632582426071167, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.24639697252508666, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.23364883652455762, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8250000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639733, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3876 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1379.9375, + "completions/mean_terminated_length": 1325.3636474609375, + "completions/min_length": 1109.0, + "completions/min_terminated_length": 1109.0, + "epoch": 0.9694923730932733, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.644507097699511, + "kl": 0.01318359375, + "learning_rate": 1.0259126332364673e-07, + "loss": -0.0132, + "num_tokens": 175849669.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8268013000488281, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.004305125792654907, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06837236922597321, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1112221667221529, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3877 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1337.0, + "completions/max_terminated_length": 1337.0, + "completions/mean_length": 1087.125, + "completions/mean_terminated_length": 1087.125, + "completions/min_length": 808.0, + "completions/min_terminated_length": 808.0, + "epoch": 0.9697424356089023, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.71922393434584, + "kl": 0.0186920166015625, + "learning_rate": 1.025493399054383e-07, + "loss": -0.0254, + "num_tokens": 175889743.0, + "reward": 0.0, + "reward_std": 0.7555013298988342, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.01537137112582272, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12000653334578276, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746356, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3878 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.75, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1451.5625, + "completions/mean_terminated_length": 1306.25, + "completions/min_length": 971.0, + "completions/min_terminated_length": 971.0, + "epoch": 0.9699924981245311, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.174551241399965, + "kl": 0.02532958984375, + "learning_rate": 1.0250775742975835e-07, + "loss": -0.0269, + "num_tokens": 175948048.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0660946369171143, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.005289129613116898, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14327375232433606, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3879 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1358.0, + "completions/mean_length": 1053.6875, + "completions/mean_terminated_length": 1023.9334106445312, + "completions/min_length": 479.0, + "completions/min_terminated_length": 479.0, + "epoch": 0.97024256064016, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.023517512305386, + "kl": 0.025177001953125, + "learning_rate": 1.0246651592829136e-07, + "loss": -0.0498, + "num_tokens": 175994707.0, + "reward": 3.725290298461914e-09, + "reward_std": 1.0673798322677612, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.019810015915399895, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07382602735560075, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13381856152046848, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3880 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1210.3125, + "completions/mean_terminated_length": 1168.9285888671875, + "completions/min_length": 955.0, + "completions/min_terminated_length": 955.0, + "epoch": 0.970492623155789, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2664265385679787, + "kl": 0.023651123046875, + "learning_rate": 1.0242561543246195e-07, + "loss": 0.0165, + "num_tokens": 176040344.0, + "reward": 0.0, + "reward_std": 0.3299294412136078, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0012688789398439377, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04956407363873974, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11474609652039006, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3881 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1469.0, + "completions/mean_length": 1280.8125, + "completions/mean_terminated_length": 1266.2000732421875, + "completions/min_length": 978.0, + "completions/min_terminated_length": 978.0, + "epoch": 0.9707426856714179, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6392407216011855, + "kl": 0.0202178955078125, + "learning_rate": 1.0238505597343493e-07, + "loss": -0.0225, + "num_tokens": 176087101.0, + "reward": 0.0, + "reward_std": 0.4673255681991577, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04873803552689783, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06401726184134773, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1067187372905475, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3882 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1498.0, + "completions/mean_length": 1170.5625, + "completions/mean_terminated_length": 1094.5384521484375, + "completions/min_length": 831.0, + "completions/min_terminated_length": 831.0, + "epoch": 0.9709927481870467, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3667527639207306, + "kl": 0.022735595703125, + "learning_rate": 1.0234483758211529e-07, + "loss": -0.055, + "num_tokens": 176134694.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9342899322509766, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03631146985980684, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10980534221086807, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1192569587999888, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3883 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.75, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1398.0, + "completions/mean_length": 1457.75, + "completions/mean_terminated_length": 1331.0, + "completions/min_length": 1188.0, + "completions/min_terminated_length": 1188.0, + "epoch": 0.9712428107026757, + "frac_reward_zero_std": 0.0, + "grad_norm": 1.8770402285064913, + "kl": 0.011932373046875, + "learning_rate": 1.023049602891481e-07, + "loss": -0.0099, + "num_tokens": 176198162.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.010070562362671, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03951424819150897, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1371422794719138, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10532137766186216, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3884 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1204.625, + "completions/mean_terminated_length": 1184.933349609375, + "completions/min_length": 771.0, + "completions/min_terminated_length": 771.0, + "epoch": 0.9714928732183046, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1113103524902397, + "kl": 0.02191162109375, + "learning_rate": 1.0226542412491849e-07, + "loss": -0.0614, + "num_tokens": 176245644.0, + "reward": 0.0, + "reward_std": 0.690178394317627, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.057420920271104056, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04037752884891573, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242309, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3885 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1281.9375, + "completions/mean_terminated_length": 1231.615478515625, + "completions/min_length": 828.0, + "completions/min_terminated_length": 828.0, + "epoch": 0.9717429357339334, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0757421279887116, + "kl": 0.0184326171875, + "learning_rate": 1.0222622911955182e-07, + "loss": -0.0073, + "num_tokens": 176293595.0, + "reward": 0.0, + "reward_std": 0.7994263172149658, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.055746620254712484, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13322691537809372, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16865480854231357, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3886 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1405.0, + "completions/mean_length": 1208.625, + "completions/mean_terminated_length": 1167.0, + "completions/min_length": 827.0, + "completions/min_terminated_length": 827.0, + "epoch": 0.9719929982495624, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3309831034447397, + "kl": 0.022705078125, + "learning_rate": 1.0218737530291318e-07, + "loss": 0.0096, + "num_tokens": 176342133.0, + "reward": 0.0, + "reward_std": 1.020411491394043, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08753272219631311, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15286429066652735, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8916666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09999999999999999, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3887 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1495.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1112.9375, + "completions/mean_terminated_length": 1112.9375, + "completions/min_length": 652.0, + "completions/min_terminated_length": 652.0, + "epoch": 0.9722430607651913, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.493098815951685, + "kl": 0.01367950439453125, + "learning_rate": 1.0214886270460809e-07, + "loss": 0.0421, + "num_tokens": 176385076.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9894026517868042, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.13463609675816632, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11263126617062526, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503964, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3888 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1265.3125, + "completions/mean_terminated_length": 1124.5, + "completions/min_length": 807.0, + "completions/min_terminated_length": 807.0, + "epoch": 0.9724931232808202, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.304530367896954, + "kl": 0.02154541015625, + "learning_rate": 1.021106913539818e-07, + "loss": -0.037, + "num_tokens": 176438065.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.963756799697876, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09937230267823957, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10424140129801353, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13045504405165223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3889 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1341.0, + "completions/mean_length": 1133.625, + "completions/mean_terminated_length": 913.7999877929688, + "completions/min_length": 361.0, + "completions/min_terminated_length": 361.0, + "epoch": 0.9727431857964491, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9592312692193503, + "kl": 0.020416259765625, + "learning_rate": 1.020728612801195e-07, + "loss": 0.0195, + "num_tokens": 176483595.0, + "reward": 0.0, + "reward_std": 0.7363954186439514, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.03743952560391356, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.047428472556705015, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12524050936172845, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3890 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1362.875, + "completions/mean_terminated_length": 1280.5999755859375, + "completions/min_length": 1098.0, + "completions/min_terminated_length": 1098.0, + "epoch": 0.972993248312078, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.829904244990861, + "kl": 0.01837158203125, + "learning_rate": 1.0203537251184666e-07, + "loss": 0.0107, + "num_tokens": 176538777.0, + "reward": 0.0, + "reward_std": 0.569575309753418, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08562947768020877, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12134077824394143, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11122216672215288, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3891 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1253.75, + "completions/mean_terminated_length": 1237.3333740234375, + "completions/min_length": 1076.0, + "completions/min_terminated_length": 1076.0, + "epoch": 0.9732433108277069, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0521060159053794, + "kl": 0.01934814453125, + "learning_rate": 1.019982250777283e-07, + "loss": 0.0036, + "num_tokens": 176579397.0, + "reward": 0.0, + "reward_std": 0.9606192708015442, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06228135702315619, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08259858851593686, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0697880388775209, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327954292297363, + "step": 3892 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1483.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1153.9375, + "completions/mean_terminated_length": 1153.9375, + "completions/min_length": 958.0, + "completions/min_terminated_length": 958.0, + "epoch": 0.9734933733433359, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.293536477618352, + "kl": 0.0160675048828125, + "learning_rate": 1.019614190060696e-07, + "loss": 0.0022, + "num_tokens": 176624772.0, + "reward": 0.0, + "reward_std": 1.0096250772476196, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07397656553240944, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09589711995946042, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09418264367902598, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3893 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1300.625, + "completions/mean_terminated_length": 1272.1429443359375, + "completions/min_length": 739.0, + "completions/min_terminated_length": 739.0, + "epoch": 0.9737434358589647, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6806183543990074, + "kl": 0.014434814453125, + "learning_rate": 1.019249543249155e-07, + "loss": -0.0914, + "num_tokens": 176679326.0, + "reward": 0.0, + "reward_std": 0.733823299407959, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05071210337847256, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08437910656067908, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09388724521901161, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3894 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1310.0, + "completions/max_terminated_length": 1310.0, + "completions/mean_length": 975.5625, + "completions/mean_terminated_length": 975.5625, + "completions/min_length": 618.0, + "completions/min_terminated_length": 618.0, + "epoch": 0.9739934983745937, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.632963541302686, + "kl": 0.024566650390625, + "learning_rate": 1.0188883106205102e-07, + "loss": -0.0221, + "num_tokens": 176710711.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.3792394995689392, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.33861519264423723, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.272525362845712, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13214750456578048, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3895 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1403.0, + "completions/max_terminated_length": 1403.0, + "completions/mean_length": 1030.6875, + "completions/mean_terminated_length": 1030.6875, + "completions/min_length": 514.0, + "completions/min_terminated_length": 514.0, + "epoch": 0.9742435608902226, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.429785873589672, + "kl": 0.02130126953125, + "learning_rate": 1.0185304924500073e-07, + "loss": -0.0101, + "num_tokens": 176770330.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8222322463989258, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.007911890696959524, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04774870871218728, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3896 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1440.0, + "completions/mean_length": 1239.875, + "completions/mean_terminated_length": 1083.800048828125, + "completions/min_length": 803.0, + "completions/min_terminated_length": 803.0, + "epoch": 0.9744936234058514, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.740058917129061, + "kl": 0.02313232421875, + "learning_rate": 1.0181760890102926e-07, + "loss": -0.0051, + "num_tokens": 176826496.0, + "reward": 0.0, + "reward_std": 0.9510804414749146, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08565076979696169, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2081057005435589, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13526380260918403, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3897 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1300.0, + "completions/mean_terminated_length": 1271.4285888671875, + "completions/min_length": 1055.0, + "completions/min_terminated_length": 1055.0, + "epoch": 0.9747436859214804, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.961348707330454, + "kl": 0.014312744140625, + "learning_rate": 1.0178251005714096e-07, + "loss": 0.0089, + "num_tokens": 176873952.0, + "reward": 0.0, + "reward_std": 0.9715489149093628, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04195865176733698, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0898821810716159, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08681611046941139, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3898 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1320.0, + "completions/mean_length": 1338.375, + "completions/mean_terminated_length": 1176.75, + "completions/min_length": 1042.0, + "completions/min_terminated_length": 1042.0, + "epoch": 0.9749937484371093, + "frac_reward_zero_std": 0.0, + "grad_norm": 810.4805791365015, + "kl": 1.45654296875, + "learning_rate": 1.0174775274008002e-07, + "loss": 0.06, + "num_tokens": 176930950.0, + "reward": 0.0, + "reward_std": 0.9081319570541382, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.006182657097943911, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08510732294122109, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0894427190999916, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3899 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1442.0, + "completions/mean_length": 1133.8125, + "completions/mean_terminated_length": 1049.3077392578125, + "completions/min_length": 657.0, + "completions/min_terminated_length": 657.0, + "epoch": 0.9752438109527382, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1690646412497574, + "kl": 0.020904541015625, + "learning_rate": 1.0171333697633028e-07, + "loss": -0.0388, + "num_tokens": 176969523.0, + "reward": 0.0, + "reward_std": 0.961146354675293, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011555569452647466, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06215982483802868, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07888106377466154, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3900 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1241.0, + "completions/max_terminated_length": 1241.0, + "completions/mean_length": 922.75, + "completions/mean_terminated_length": 922.75, + "completions/min_length": 638.0, + "completions/min_terminated_length": 638.0, + "epoch": 0.9754938734683671, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8043319680674315, + "kl": 0.01239013671875, + "learning_rate": 1.0167926279211548e-07, + "loss": 0.0342, + "num_tokens": 177008967.0, + "reward": -3.725290298461914e-09, + "reward_std": 1.0465383529663086, + "rewards/wordcountpos_reward_GEOBench/mean": -3.725290298461914e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1287288332876621, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19364209641726302, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08850612031567838, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3901 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1472.0, + "completions/mean_length": 1312.3125, + "completions/mean_terminated_length": 1249.75, + "completions/min_length": 953.0, + "completions/min_terminated_length": 953.0, + "epoch": 0.975743935983996, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.231482765912088, + "kl": 0.02197265625, + "learning_rate": 1.0164553021339898e-07, + "loss": 0.0169, + "num_tokens": 177061948.0, + "reward": 0.0, + "reward_std": 0.7310250997543335, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.13039874317528669, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09100631111052566, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10183501544346311, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3902 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1496.0, + "completions/mean_length": 1335.9375, + "completions/mean_terminated_length": 1312.5, + "completions/min_length": 1062.0, + "completions/min_terminated_length": 1062.0, + "epoch": 0.9759939984996249, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0077751542292734, + "kl": 0.0168914794921875, + "learning_rate": 1.0161213926588384e-07, + "loss": -0.0008, + "num_tokens": 177113843.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.6930589079856873, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.39400514388619995, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.36504229039610475, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8875000000000001, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333331, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3903 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1456.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1046.375, + "completions/mean_terminated_length": 1046.375, + "completions/min_length": 679.0, + "completions/min_terminated_length": 679.0, + "epoch": 0.9762440610152539, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0779499709135503, + "kl": 0.02362060546875, + "learning_rate": 1.01579089975013e-07, + "loss": 0.0024, + "num_tokens": 177156321.0, + "reward": 3.725290298461914e-08, + "reward_std": 1.0327515602111816, + "rewards/wordcountpos_reward_GEOBench/mean": 3.725290298461914e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.10582900183895377, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06257754137030211, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13743685418725538, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3904 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1481.0, + "completions/mean_length": 1229.875, + "completions/mean_terminated_length": 1139.8333740234375, + "completions/min_length": 588.0, + "completions/min_terminated_length": 588.0, + "epoch": 0.9764941235308827, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3534163937511, + "kl": 0.014923095703125, + "learning_rate": 1.0154638236596872e-07, + "loss": -0.0202, + "num_tokens": 177195359.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7182954549789429, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05314396641604576, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06168242569940338, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3905 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1410.0, + "completions/mean_length": 1119.4375, + "completions/mean_terminated_length": 1065.071533203125, + "completions/min_length": 622.0, + "completions/min_terminated_length": 622.0, + "epoch": 0.9767441860465116, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.378176391949303, + "kl": 0.02587890625, + "learning_rate": 1.015140164636733e-07, + "loss": -0.0143, + "num_tokens": 177245438.0, + "reward": 0.0, + "reward_std": 0.7871840000152588, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02622340650188712, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05859630707530174, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1704025734460517, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3906 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 967.0, + "completions/max_terminated_length": 967.0, + "completions/mean_length": 816.75, + "completions/mean_terminated_length": 816.75, + "completions/min_length": 635.0, + "completions/min_terminated_length": 635.0, + "epoch": 0.9769942485621406, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.096676679222653, + "kl": 0.027252197265625, + "learning_rate": 1.0148199229278835e-07, + "loss": 0.0137, + "num_tokens": 177275618.0, + "reward": 0.0, + "reward_std": 1.0212676525115967, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0025883314095814947, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06280674650803493, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16815997674172586, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3907 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1047.0, + "completions/max_terminated_length": 1047.0, + "completions/mean_length": 760.75, + "completions/mean_terminated_length": 760.75, + "completions/min_length": 509.0, + "completions/min_terminated_length": 509.0, + "epoch": 0.9772443110777694, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.304877592764383, + "kl": 0.0218505859375, + "learning_rate": 1.0145030987771531e-07, + "loss": 0.0102, + "num_tokens": 177319814.0, + "reward": 0.0, + "reward_std": 0.7123987078666687, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06894062549448235, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10056618077809362, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16843506277010845, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3908 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1446.0, + "completions/mean_length": 1203.0625, + "completions/mean_terminated_length": 1183.2667236328125, + "completions/min_length": 692.0, + "completions/min_terminated_length": 692.0, + "epoch": 0.9774943735933983, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2500934469989544, + "kl": 0.023223876953125, + "learning_rate": 1.0141896924259508e-07, + "loss": 0.0055, + "num_tokens": 177366871.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9392913579940796, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04890943974892095, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08040652318902691, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10470416879457554, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3909 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1233.5625, + "completions/mean_terminated_length": 1195.5, + "completions/min_length": 999.0, + "completions/min_terminated_length": 999.0, + "epoch": 0.9777444361090273, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.247292276049081, + "kl": 0.0181732177734375, + "learning_rate": 1.0138797041130815e-07, + "loss": -0.0373, + "num_tokens": 177409456.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.037282943725586, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0044076247560025286, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03781692989764732, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09953596037316068, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3910 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1475.0, + "completions/mean_length": 974.6875, + "completions/mean_terminated_length": 939.6666870117188, + "completions/min_length": 546.0, + "completions/min_terminated_length": 546.0, + "epoch": 0.9779944986246562, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0406733435891313, + "kl": 0.02197265625, + "learning_rate": 1.0135731340747466e-07, + "loss": -0.0297, + "num_tokens": 177448315.0, + "reward": 0.0, + "reward_std": 1.0252885818481445, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04815748442505738, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06503369313026648, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11979921473804347, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3911 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1495.0, + "completions/mean_length": 1326.625, + "completions/mean_terminated_length": 1268.8333740234375, + "completions/min_length": 1055.0, + "completions/min_terminated_length": 1055.0, + "epoch": 0.978244561140285, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1451333656954117, + "kl": 0.0217437744140625, + "learning_rate": 1.0132699825445419e-07, + "loss": 0.0106, + "num_tokens": 177503557.0, + "reward": 0.0, + "reward_std": 0.43984100222587585, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06596705272236648, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10416565392221984, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15864005379054394, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3912 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1406.0, + "completions/mean_length": 1223.8125, + "completions/mean_terminated_length": 1184.357177734375, + "completions/min_length": 755.0, + "completions/min_terminated_length": 755.0, + "epoch": 0.978494623655914, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1077612395406713, + "kl": 0.0182952880859375, + "learning_rate": 1.0129702497534595e-07, + "loss": -0.0028, + "num_tokens": 177557138.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.019202709197998, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.06870343880904069, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11708452141005082, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1308094458023239, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3913 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1423.0, + "completions/mean_length": 1030.5625, + "completions/mean_terminated_length": 922.2307739257812, + "completions/min_length": 539.0, + "completions/min_terminated_length": 539.0, + "epoch": 0.9787446861715429, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1522547598699457, + "kl": 0.0169525146484375, + "learning_rate": 1.0126739359298846e-07, + "loss": -0.1009, + "num_tokens": 177583571.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9850387573242188, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.028868672041910667, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09270434469507918, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15939701191492708, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3914 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 1055.5, + "completions/mean_terminated_length": 1025.86669921875, + "completions/min_length": 728.0, + "completions/min_terminated_length": 728.0, + "epoch": 0.9789947486871718, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.09120513437476, + "kl": 0.0178680419921875, + "learning_rate": 1.0123810412995998e-07, + "loss": -0.0243, + "num_tokens": 177633179.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9202115535736084, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08979963287675982, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19563052630612426, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12405196043952266, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3915 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1438.0, + "completions/mean_length": 1156.0625, + "completions/mean_terminated_length": 1133.1334228515625, + "completions/min_length": 910.0, + "completions/min_terminated_length": 910.0, + "epoch": 0.9792448112028007, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3646118882836014, + "kl": 0.01605224609375, + "learning_rate": 1.0120915660857798e-07, + "loss": -0.018, + "num_tokens": 177686484.0, + "reward": 4.470348358154297e-08, + "reward_std": 0.9523289203643799, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011510533095254655, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09584852861490928, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563382, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3916 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1379.0, + "completions/mean_length": 1234.1875, + "completions/mean_terminated_length": 1172.84619140625, + "completions/min_length": 891.0, + "completions/min_terminated_length": 891.0, + "epoch": 0.9794948737184296, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7532235715787543, + "kl": 0.0167694091796875, + "learning_rate": 1.0118055105089965e-07, + "loss": 0.0117, + "num_tokens": 177738543.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.699673056602478, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.1876853053986462, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.25361052418153146, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563384, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3917 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1313.0, + "completions/mean_length": 1199.625, + "completions/mean_terminated_length": 1179.60009765625, + "completions/min_length": 991.0, + "completions/min_terminated_length": 991.0, + "epoch": 0.9797449362340586, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5073170730324432, + "kl": 0.01727294921875, + "learning_rate": 1.0115228747872133e-07, + "loss": -0.0527, + "num_tokens": 177781793.0, + "reward": 0.0, + "reward_std": 0.8943246603012085, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0647421800473968, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08788561615540821, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07969850595746354, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3918 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1189.0, + "completions/max_terminated_length": 1189.0, + "completions/mean_length": 970.125, + "completions/mean_terminated_length": 970.125, + "completions/min_length": 809.0, + "completions/min_terminated_length": 809.0, + "epoch": 0.9799949987496874, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.705475245704261, + "kl": 0.023529052734375, + "learning_rate": 1.01124365913579e-07, + "loss": -0.0523, + "num_tokens": 177815219.0, + "reward": 0.0, + "reward_std": 1.0195950269699097, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.005704209200119437, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.022816836800477747, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.161245154965971, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3919 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1414.0, + "completions/mean_length": 1340.0625, + "completions/mean_terminated_length": 1244.0999755859375, + "completions/min_length": 887.0, + "completions/min_terminated_length": 887.0, + "epoch": 0.9802450612653163, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.885573922998708, + "kl": 0.02252197265625, + "learning_rate": 1.0109678637674796e-07, + "loss": 0.0004, + "num_tokens": 177870596.0, + "reward": 2.2351741790771484e-08, + "reward_std": 1.027890682220459, + "rewards/wordcountpos_reward_GEOBench/mean": 2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.018241741848914944, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06505858348749899, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1246476515504285, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3920 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1456.0, + "completions/max_terminated_length": 1456.0, + "completions/mean_length": 1086.0625, + "completions/mean_terminated_length": 1086.0625, + "completions/min_length": 826.0, + "completions/min_terminated_length": 826.0, + "epoch": 0.9804951237809453, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.018607443632997, + "kl": 0.022918701171875, + "learning_rate": 1.0106954888924287e-07, + "loss": -0.0126, + "num_tokens": 177912997.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.562686562538147, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.028918511754694714, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07655238902335633, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.16815997674172586, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3921 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1303.0, + "completions/mean_length": 1157.625, + "completions/mean_terminated_length": 1108.71435546875, + "completions/min_length": 836.0, + "completions/min_terminated_length": 836.0, + "epoch": 0.9807451862965741, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3446026026185947, + "kl": 0.02215576171875, + "learning_rate": 1.0104265347181784e-07, + "loss": -0.0265, + "num_tokens": 177948215.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7066589593887329, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.00130526717457668, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.20083220032860008, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09339283817414602, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3922 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1049.0, + "completions/max_terminated_length": 1049.0, + "completions/mean_length": 854.25, + "completions/mean_terminated_length": 854.25, + "completions/min_length": 664.0, + "completions/min_terminated_length": 664.0, + "epoch": 0.980995248812203, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.8619271017102306, + "kl": 0.02362060546875, + "learning_rate": 1.0101610014496628e-07, + "loss": -0.0361, + "num_tokens": 177988339.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.771213710308075, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03496191870354769, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.077544296959387, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1354690069789096, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3923 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1422.0, + "completions/mean_length": 1220.25, + "completions/mean_terminated_length": 1052.4000244140625, + "completions/min_length": 478.0, + "completions/min_terminated_length": 478.0, + "epoch": 0.981245311327832, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.853527663611948, + "kl": 0.020233154296875, + "learning_rate": 1.0098988892892091e-07, + "loss": -0.0722, + "num_tokens": 178043807.0, + "reward": 0.0, + "reward_std": 0.9739243984222412, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0033231694842703833, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05776162155961207, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0850925422157591, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3924 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1229.0, + "completions/mean_length": 1106.1875, + "completions/mean_terminated_length": 1079.933349609375, + "completions/min_length": 989.0, + "completions/min_terminated_length": 989.0, + "epoch": 0.9814953738434609, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8446813913288773, + "kl": 0.018096923828125, + "learning_rate": 1.0096401984365374e-07, + "loss": -0.0393, + "num_tokens": 178082178.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0524044036865234, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0954216880051482, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.057074870499977566, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.925, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10576003586036262, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3925 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1428.0, + "completions/mean_length": 1204.875, + "completions/mean_terminated_length": 1162.71435546875, + "completions/min_length": 896.0, + "completions/min_terminated_length": 896.0, + "epoch": 0.9817454363590897, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.829050039442462, + "kl": 0.02008056640625, + "learning_rate": 1.0093849290887632e-07, + "loss": -0.0325, + "num_tokens": 178127624.0, + "reward": 0.0, + "reward_std": 0.586344301700592, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06703588127012498, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10104303411842569, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08777074514725111, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3926 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1493.0, + "completions/max_terminated_length": 1493.0, + "completions/mean_length": 1211.5, + "completions/mean_terminated_length": 1211.5, + "completions/min_length": 1001.0, + "completions/min_terminated_length": 1001.0, + "epoch": 0.9819954988747187, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2479665183490587, + "kl": 0.019500732421875, + "learning_rate": 1.0091330814403923e-07, + "loss": -0.0327, + "num_tokens": 178175608.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5625860095024109, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010645701205285046, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14804134019659534, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12524050936172845, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3927 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 985.0, + "completions/max_terminated_length": 985.0, + "completions/mean_length": 728.25, + "completions/mean_terminated_length": 728.25, + "completions/min_length": 341.0, + "completions/min_terminated_length": 341.0, + "epoch": 0.9822455613903476, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.574417666537789, + "kl": 0.01953125, + "learning_rate": 1.0088846556833236e-07, + "loss": -0.094, + "num_tokens": 178213780.0, + "reward": 0.0, + "reward_std": 0.9879322052001953, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.040121984431537694, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0744481401739982, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503963, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3928 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1208.0, + "completions/max_terminated_length": 1208.0, + "completions/mean_length": 921.4375, + "completions/mean_terminated_length": 921.4375, + "completions/min_length": 678.0, + "completions/min_terminated_length": 678.0, + "epoch": 0.9824956239059764, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.066732446889595, + "kl": 0.0149383544921875, + "learning_rate": 1.0086396520068503e-07, + "loss": -0.0081, + "num_tokens": 178254979.0, + "reward": 0.0, + "reward_std": 0.8647475242614746, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06365285312383395, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06918743291604286, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.07503085784948506, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3929 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1462.0, + "completions/mean_length": 1285.5625, + "completions/mean_terminated_length": 1214.0833740234375, + "completions/min_length": 966.0, + "completions/min_terminated_length": 966.0, + "epoch": 0.9827456864216054, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.258743921439025, + "kl": 0.022186279296875, + "learning_rate": 1.0083980705976566e-07, + "loss": -0.0627, + "num_tokens": 178309684.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9962244033813477, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07527932785282243, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08447887503898498, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1258305739211792, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3930 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1290.25, + "completions/mean_terminated_length": 1260.2857666015625, + "completions/min_length": 855.0, + "completions/min_terminated_length": 855.0, + "epoch": 0.9829957489372343, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8485372546713656, + "kl": 0.017181396484375, + "learning_rate": 1.0081599116398198e-07, + "loss": 0.0294, + "num_tokens": 178356296.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.4282299876213074, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.01943778576782879, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07255032160914898, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1270024788326182, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3931 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1336.0, + "completions/max_terminated_length": 1336.0, + "completions/mean_length": 1113.5, + "completions/mean_terminated_length": 1113.5, + "completions/min_length": 922.0, + "completions/min_terminated_length": 922.0, + "epoch": 0.9832458114528632, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.7184489016811497, + "kl": 0.026031494140625, + "learning_rate": 1.0079251753148091e-07, + "loss": -0.0397, + "num_tokens": 178396944.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9364862442016602, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.013474963896509002, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09015373305877482, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06540472290116194, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3932 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1252.625, + "completions/mean_terminated_length": 1236.1334228515625, + "completions/min_length": 1059.0, + "completions/min_terminated_length": 1059.0, + "epoch": 0.9834958739684921, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5274493022133964, + "kl": 0.008697509765625, + "learning_rate": 1.0076938618014851e-07, + "loss": -0.0101, + "num_tokens": 178454474.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7097426652908325, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.022854279857273424, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2159425172501647, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0969917904124231, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3933 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1426.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 899.5, + "completions/mean_terminated_length": 899.5, + "completions/min_length": 674.0, + "completions/min_terminated_length": 674.0, + "epoch": 0.983745936484121, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5464340877482288, + "kl": 0.01499176025390625, + "learning_rate": 1.0074659712761015e-07, + "loss": -0.0863, + "num_tokens": 178496498.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0250438451766968, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02916086289583973, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07665022494823538, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10945995377982527, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3934 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1491.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1024.9375, + "completions/mean_terminated_length": 1024.9375, + "completions/min_length": 550.0, + "completions/min_terminated_length": 550.0, + "epoch": 0.98399599899975, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.662373401550716, + "kl": 0.019073486328125, + "learning_rate": 1.0072415039123037e-07, + "loss": -0.0381, + "num_tokens": 178550025.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0376999378204346, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010396685447096037, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.018610101600458145, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7083333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1517795672580372, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3935 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1374.0, + "completions/mean_length": 1314.4375, + "completions/mean_terminated_length": 1128.875, + "completions/min_length": 944.0, + "completions/min_terminated_length": 944.0, + "epoch": 0.9842460615153789, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.527504462227094, + "kl": 0.0135955810546875, + "learning_rate": 1.0070204598811277e-07, + "loss": 0.0141, + "num_tokens": 178598056.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.9703945517539978, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.022184609307771098, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03570759846274135, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12412657816683506, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3936 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1335.0, + "completions/mean_length": 1312.75, + "completions/mean_terminated_length": 1125.5, + "completions/min_length": 865.0, + "completions/min_terminated_length": 865.0, + "epoch": 0.9844961240310077, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4068073753518657, + "kl": 0.0154571533203125, + "learning_rate": 1.006802839351002e-07, + "loss": -0.0175, + "num_tokens": 178652188.0, + "reward": -2.9802322387695312e-08, + "reward_std": 1.006266713142395, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1518512811516948, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09596137575367246, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10749676997731403, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3937 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1002.0, + "completions/mean_length": 911.5625, + "completions/mean_terminated_length": 872.3333740234375, + "completions/min_length": 637.0, + "completions/min_terminated_length": 637.0, + "epoch": 0.9847461865466367, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.2459729989830657, + "kl": 0.00641632080078125, + "learning_rate": 1.0065886424877461e-07, + "loss": 0.0503, + "num_tokens": 178692165.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7329870462417603, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.04295087396899319, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06334574289224103, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6666666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10327955589886444, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3938 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1491.0, + "completions/mean_length": 1350.9375, + "completions/mean_terminated_length": 1316.5384521484375, + "completions/min_length": 964.0, + "completions/min_terminated_length": 964.0, + "epoch": 0.9849962490622656, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.602267702152474, + "kl": 0.016021728515625, + "learning_rate": 1.0063778694545713e-07, + "loss": -0.0017, + "num_tokens": 178738068.0, + "reward": -5.960464477539063e-08, + "reward_std": 0.4504616856575012, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03325109178119179, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.074117684916747, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0807373427759331, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3939 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1489.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1135.25, + "completions/mean_terminated_length": 1135.25, + "completions/min_length": 678.0, + "completions/min_terminated_length": 678.0, + "epoch": 0.9852463115778944, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6857528054832853, + "kl": 0.015472412109375, + "learning_rate": 1.006170520412079e-07, + "loss": -0.0299, + "num_tokens": 178773648.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5818321704864502, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03482698490352605, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08069435688807419, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17841898254763516, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3940 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1379.0, + "completions/max_terminated_length": 1379.0, + "completions/mean_length": 1159.0, + "completions/mean_terminated_length": 1159.0, + "completions/min_length": 814.0, + "completions/min_terminated_length": 814.0, + "epoch": 0.9854963740935234, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.480572336729593, + "kl": 0.020050048828125, + "learning_rate": 1.0059665955182626e-07, + "loss": -0.0224, + "num_tokens": 178811960.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.037232518196106, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.05128980458169996, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17702387971271547, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11855612829185828, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3941 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1484.0, + "completions/mean_length": 1235.125, + "completions/mean_terminated_length": 1197.2857666015625, + "completions/min_length": 813.0, + "completions/min_terminated_length": 813.0, + "epoch": 0.9857464366091523, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5101864018122537, + "kl": 0.016143798828125, + "learning_rate": 1.0057660949285062e-07, + "loss": -0.0111, + "num_tokens": 178863114.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0032176971435547, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0516345591109968, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07320542826741229, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238707, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3942 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1365.0, + "completions/mean_length": 1079.625, + "completions/mean_terminated_length": 982.6154174804688, + "completions/min_length": 686.0, + "completions/min_terminated_length": 686.0, + "epoch": 0.9859964991247812, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.6630103604361333, + "kl": 0.0161590576171875, + "learning_rate": 1.0055690187955842e-07, + "loss": -0.0364, + "num_tokens": 178900500.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9580137729644775, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.026437605561967642, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.027756350499373108, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8375, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14599594109020572, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3943 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1411.1875, + "completions/mean_terminated_length": 1357.9000244140625, + "completions/min_length": 1147.0, + "completions/min_terminated_length": 1147.0, + "epoch": 0.9862465616404101, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.350104946914508, + "kl": 0.022003173828125, + "learning_rate": 1.005375367269663e-07, + "loss": -0.0139, + "num_tokens": 178952903.0, + "reward": 0.0, + "reward_std": 0.6357720494270325, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.22601717355767217, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.36141316395056305, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133131, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3944 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1426.0, + "completions/mean_length": 1333.5625, + "completions/mean_terminated_length": 1204.111083984375, + "completions/min_length": 976.0, + "completions/min_terminated_length": 976.0, + "epoch": 0.986496624156039, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.016547294945718, + "kl": 0.0211029052734375, + "learning_rate": 1.0051851404982976e-07, + "loss": -0.0168, + "num_tokens": 179014416.0, + "reward": 7.450580596923828e-09, + "reward_std": 0.9378402829170227, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11482622445412956, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08518406362970456, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11642832797715322, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3945 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1355.0, + "completions/mean_length": 1382.4375, + "completions/mean_terminated_length": 1123.800048828125, + "completions/min_length": 557.0, + "completions/min_terminated_length": 557.0, + "epoch": 0.9867466866716679, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5764989593707517, + "kl": 0.020233154296875, + "learning_rate": 1.0049983386264346e-07, + "loss": -0.013, + "num_tokens": 179075471.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9455363750457764, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0545115765259603, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.06346316695431908, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8666666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590963, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3946 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1435.0, + "completions/mean_length": 1231.5625, + "completions/mean_terminated_length": 1142.0833740234375, + "completions/min_length": 894.0, + "completions/min_terminated_length": 894.0, + "epoch": 0.9869967491872969, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.287161197271656, + "kl": 0.02117919921875, + "learning_rate": 1.0048149617964114e-07, + "loss": 0.0081, + "num_tokens": 179124680.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9557429552078247, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.031113227235765272, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07740132193652098, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.14168300559373406, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3947 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1445.0, + "completions/mean_length": 1144.625, + "completions/mean_terminated_length": 1093.857177734375, + "completions/min_length": 751.0, + "completions/min_terminated_length": 751.0, + "epoch": 0.9872468117029257, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.355345879713742, + "kl": 0.016693115234375, + "learning_rate": 1.0046350101479544e-07, + "loss": -0.0337, + "num_tokens": 179164578.0, + "reward": 0.0, + "reward_std": 0.8170695304870605, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.21716767077456256, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2507742539264809, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08062257748298553, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3948 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1331.0, + "completions/mean_length": 1208.9375, + "completions/mean_terminated_length": 1167.357177734375, + "completions/min_length": 931.0, + "completions/min_terminated_length": 931.0, + "epoch": 0.9874968742185546, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.983735698808071, + "kl": 0.0185089111328125, + "learning_rate": 1.0044584838181812e-07, + "loss": -0.0227, + "num_tokens": 179205609.0, + "reward": 0.0, + "reward_std": 1.0679681301116943, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.17822834926853626, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09231680643731743, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8458333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10246950765959599, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3949 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 1126.6875, + "completions/mean_terminated_length": 1073.357177734375, + "completions/min_length": 679.0, + "completions/min_terminated_length": 679.0, + "epoch": 0.9877469367341836, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2392105414895407, + "kl": 0.012847900390625, + "learning_rate": 1.0042853829415986e-07, + "loss": 0.0095, + "num_tokens": 179262404.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.4921036958694458, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.20241477369747887, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.24232672791023802, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7041666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09418264367902598, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3950 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1429.0, + "completions/max_terminated_length": 1429.0, + "completions/mean_length": 956.4375, + "completions/mean_terminated_length": 956.4375, + "completions/min_length": 626.0, + "completions/min_terminated_length": 626.0, + "epoch": 0.9879969992498124, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6166869867732587, + "kl": 0.0222930908203125, + "learning_rate": 1.0041157076501041e-07, + "loss": -0.0374, + "num_tokens": 179291163.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.7420085072517395, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.02855816438080931, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.03848484356230258, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13526380260918403, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3951 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1429.0, + "completions/mean_length": 1180.625, + "completions/mean_terminated_length": 1159.3333740234375, + "completions/min_length": 758.0, + "completions/min_terminated_length": 758.0, + "epoch": 0.9882470617654414, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2668434807772786, + "kl": 0.020599365234375, + "learning_rate": 1.0039494580729847e-07, + "loss": -0.0246, + "num_tokens": 179343453.0, + "reward": 0.0, + "reward_std": 0.903883695602417, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03588713556643828, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11178387799759107, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06885303726590966, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3952 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1488.0, + "completions/mean_length": 1390.5625, + "completions/mean_terminated_length": 1340.8182373046875, + "completions/min_length": 1158.0, + "completions/min_terminated_length": 1158.0, + "epoch": 0.9884971242810703, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3860615305952013, + "kl": 0.0174713134765625, + "learning_rate": 1.0037866343369167e-07, + "loss": 0.0105, + "num_tokens": 179394430.0, + "reward": 0.0, + "reward_std": 0.9476567506790161, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10231040723591475, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07002285497268322, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.06831300510639733, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3953 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1368.0, + "completions/mean_length": 1262.375, + "completions/mean_terminated_length": 1207.5384521484375, + "completions/min_length": 906.0, + "completions/min_terminated_length": 906.0, + "epoch": 0.9887471867966992, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1630271658614864, + "kl": 0.022247314453125, + "learning_rate": 1.0036272365659667e-07, + "loss": 0.037, + "num_tokens": 179441892.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9629299640655518, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05823961387664913, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13939438492980002, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0838870492807861, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3954 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1443.0, + "completions/mean_length": 1418.1875, + "completions/mean_terminated_length": 1313.0, + "completions/min_length": 883.0, + "completions/min_terminated_length": 883.0, + "epoch": 0.9889972493123281, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.498970149473122, + "kl": 0.016448974609375, + "learning_rate": 1.0034712648815902e-07, + "loss": 0.0106, + "num_tokens": 179494703.0, + "reward": 0.0, + "reward_std": 0.9592735767364502, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.07551116976465634, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.13984822691663087, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11013459778666118, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3955 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1277.8125, + "completions/mean_terminated_length": 1246.071533203125, + "completions/min_length": 903.0, + "completions/min_terminated_length": 903.0, + "epoch": 0.989247311827957, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.119212633571498, + "kl": 0.018218994140625, + "learning_rate": 1.0033187194026327e-07, + "loss": 0.0286, + "num_tokens": 179551676.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9570690989494324, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.021716993246855006, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.039250825246527433, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.725, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066223, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3956 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1477.0, + "completions/max_terminated_length": 1477.0, + "completions/mean_length": 1250.0625, + "completions/mean_terminated_length": 1250.0625, + "completions/min_length": 1032.0, + "completions/min_terminated_length": 1032.0, + "epoch": 0.9894973743435859, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0846395022712496, + "kl": 0.01861572265625, + "learning_rate": 1.0031696002453284e-07, + "loss": -0.0104, + "num_tokens": 179591101.0, + "reward": 0.0, + "reward_std": 0.864221453666687, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.011400640239627266, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04171197728977003, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13270686158262923, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3957 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1219.0, + "completions/max_terminated_length": 1219.0, + "completions/mean_length": 929.0, + "completions/mean_terminated_length": 929.0, + "completions/min_length": 592.0, + "completions/min_terminated_length": 592.0, + "epoch": 0.9897474368592148, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1647115798844823, + "kl": 0.01751708984375, + "learning_rate": 1.0030239075233017e-07, + "loss": -0.0458, + "num_tokens": 179628381.0, + "reward": 0.0, + "reward_std": 0.6897619366645813, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09765300302164437, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10160208041495984, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8583333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066221, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3958 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1218.0, + "completions/max_terminated_length": 1218.0, + "completions/mean_length": 1014.5625, + "completions/mean_terminated_length": 1014.5625, + "completions/min_length": 747.0, + "completions/min_terminated_length": 747.0, + "epoch": 0.9899974993748437, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.259904736816521, + "kl": 0.0148773193359375, + "learning_rate": 1.0028816413475651e-07, + "loss": -0.036, + "num_tokens": 179667302.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0181900262832642, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.008628682574770325, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08982510999227344, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8333333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09428090415820635, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3959 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1489.0, + "completions/mean_length": 1145.5625, + "completions/mean_terminated_length": 1121.933349609375, + "completions/min_length": 744.0, + "completions/min_terminated_length": 744.0, + "epoch": 0.9902475618904726, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2284861550790094, + "kl": 0.020660400390625, + "learning_rate": 1.0027428018265208e-07, + "loss": 0.0067, + "num_tokens": 179712431.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0276758670806885, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.1458582574398592, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.3682640499718939, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.825, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.072520750542581, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3960 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1397.0, + "completions/mean_length": 1427.9375, + "completions/mean_terminated_length": 1269.4000244140625, + "completions/min_length": 1215.0, + "completions/min_terminated_length": 1215.0, + "epoch": 0.9904976244061016, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3275519628007744, + "kl": 0.0156097412109375, + "learning_rate": 1.0026073890659602e-07, + "loss": -0.012, + "num_tokens": 179761702.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.6388014554977417, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03298127935253952, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09632260855415314, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13662601021279464, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3961 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1386.0, + "completions/max_terminated_length": 1386.0, + "completions/mean_length": 994.125, + "completions/mean_terminated_length": 994.125, + "completions/min_length": 755.0, + "completions/min_terminated_length": 755.0, + "epoch": 0.9907476869217304, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5681764926749446, + "kl": 0.021942138671875, + "learning_rate": 1.0024754031690634e-07, + "loss": 0.0365, + "num_tokens": 179792704.0, + "reward": 0.0, + "reward_std": 0.8944810628890991, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.024457559181082222, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0735362908558149, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10470416879457554, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3962 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1281.0, + "completions/max_terminated_length": 1281.0, + "completions/mean_length": 962.375, + "completions/mean_terminated_length": 962.375, + "completions/min_length": 645.0, + "completions/min_terminated_length": 645.0, + "epoch": 0.9909977494373593, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.6232616793050623, + "kl": 0.025848388671875, + "learning_rate": 1.0023468442363985e-07, + "loss": -0.0257, + "num_tokens": 179836686.0, + "reward": 1.4901161193847656e-08, + "reward_std": 1.0426034927368164, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03266012825500595, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05649990901365066, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12292725943057184, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3963 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1468.0, + "completions/mean_length": 1363.375, + "completions/mean_terminated_length": 1257.111083984375, + "completions/min_length": 939.0, + "completions/min_terminated_length": 939.0, + "epoch": 0.9912478119529883, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.875264797277503, + "kl": 0.0153961181640625, + "learning_rate": 1.002221712365924e-07, + "loss": 0.0136, + "num_tokens": 179894396.0, + "reward": 0.0, + "reward_std": 0.7892028093338013, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.03397211536129042, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05500592492677391, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7208333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.15244914148902494, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3964 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1288.0, + "completions/max_terminated_length": 1288.0, + "completions/mean_length": 1045.6875, + "completions/mean_terminated_length": 1045.6875, + "completions/min_length": 611.0, + "completions/min_terminated_length": 611.0, + "epoch": 0.9914978744686171, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.7732080268969677, + "kl": 0.02130126953125, + "learning_rate": 1.0021000076529855e-07, + "loss": -0.0116, + "num_tokens": 179942311.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9989646673202515, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.07229265513543608, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.08373141575858474, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6625, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10741060020797315, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3965 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1410.0, + "completions/mean_length": 1315.0625, + "completions/mean_terminated_length": 1253.416748046875, + "completions/min_length": 954.0, + "completions/min_terminated_length": 954.0, + "epoch": 0.991747936984246, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.3938582232819345, + "kl": 0.024658203125, + "learning_rate": 1.0019817301903185e-07, + "loss": -0.0092, + "num_tokens": 179993072.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8671700954437256, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08776742440075179, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14652966771243212, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.13158576980363348, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3966 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1217.625, + "completions/mean_terminated_length": 1123.5, + "completions/min_length": 910.0, + "completions/min_terminated_length": 910.0, + "epoch": 0.991997999499875, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2512048325387957, + "kl": 0.021392822265625, + "learning_rate": 1.0018668800680464e-07, + "loss": -0.0509, + "num_tokens": 180039786.0, + "reward": 5.960464477539063e-08, + "reward_std": 0.9283808469772339, + "rewards/wordcountpos_reward_GEOBench/mean": 5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.029453603904571673, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19378349040828455, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8708333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0787635937708768, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3967 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1460.0, + "completions/max_terminated_length": 1460.0, + "completions/mean_length": 1125.1875, + "completions/mean_terminated_length": 1125.1875, + "completions/min_length": 919.0, + "completions/min_terminated_length": 919.0, + "epoch": 0.9922480620155039, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.4735782562333686, + "kl": 0.02313232421875, + "learning_rate": 1.001755457373681e-07, + "loss": -0.0382, + "num_tokens": 180089965.0, + "reward": -5.960464477539063e-08, + "reward_std": 1.0192793607711792, + "rewards/wordcountpos_reward_GEOBench/mean": -5.960464477539063e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.06527859465367418, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10564083574728779, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7916666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08734775114237134, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3968 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1184.6875, + "completions/mean_terminated_length": 1079.5833740234375, + "completions/min_length": 757.0, + "completions/min_terminated_length": 757.0, + "epoch": 0.9924981245311327, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1748108674951276, + "kl": 0.021942138671875, + "learning_rate": 1.0016474621921225e-07, + "loss": -0.0129, + "num_tokens": 180126648.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.9770182967185974, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.010372688950153593, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.02851109785415099, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8541666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1338185615204685, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3969 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1395.8125, + "completions/mean_terminated_length": 1261.857177734375, + "completions/min_length": 1039.0, + "completions/min_terminated_length": 1039.0, + "epoch": 0.9927481870467617, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9379109008921516, + "kl": 0.0247955322265625, + "learning_rate": 1.0015428946056603e-07, + "loss": -0.0077, + "num_tokens": 180173965.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.8798666000366211, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.11493746086358148, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12187840341303109, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1796601730428249, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3970 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.6875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1418.6875, + "completions/mean_terminated_length": 1239.800048828125, + "completions/min_length": 1044.0, + "completions/min_terminated_length": 1044.0, + "epoch": 0.9929982495623906, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1435441366773866, + "kl": 0.012969970703125, + "learning_rate": 1.0014417546939706e-07, + "loss": -0.0275, + "num_tokens": 180237104.0, + "reward": -1.862645149230957e-08, + "reward_std": 1.0231328010559082, + "rewards/wordcountpos_reward_GEOBench/mean": -1.862645149230957e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04374894296467447, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.15139558441357387, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11855612829185828, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3971 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1179.0, + "completions/mean_length": 1268.625, + "completions/mean_terminated_length": 1037.25, + "completions/min_length": 951.0, + "completions/min_terminated_length": 951.0, + "epoch": 0.9932483120780196, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.291798424234861, + "kl": 0.013458251953125, + "learning_rate": 1.0013440425341193e-07, + "loss": 0.015, + "num_tokens": 180288330.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0456284284591675, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.08407623904729404, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11867820622251553, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.75, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.4472135954999579, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10852547064066473, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3972 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1473.0, + "completions/mean_length": 1245.0, + "completions/mean_terminated_length": 1228.0001220703125, + "completions/min_length": 972.0, + "completions/min_terminated_length": 972.0, + "epoch": 0.9934983745936484, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.111365565696447, + "kl": 0.01898193359375, + "learning_rate": 1.0012497582005597e-07, + "loss": 0.0203, + "num_tokens": 180338562.0, + "reward": 0.0, + "reward_std": 0.8096473217010498, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.19663672420140899, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14179024384663066, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5163977794943223, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1299572579307862, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3973 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1486.0, + "completions/mean_length": 967.625, + "completions/mean_terminated_length": 891.5714721679688, + "completions/min_length": 410.0, + "completions/min_terminated_length": 410.0, + "epoch": 0.9937484371092773, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.5910207608377127, + "kl": 0.026275634765625, + "learning_rate": 1.0011589017651331e-07, + "loss": -0.0238, + "num_tokens": 180382308.0, + "reward": 4.470348358154297e-08, + "reward_std": 1.0165892839431763, + "rewards/wordcountpos_reward_GEOBench/mean": 4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.027864014513737358, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07305771281777634, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7166666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11800816042090449, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3974 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1387.0, + "completions/mean_length": 1271.9375, + "completions/mean_terminated_length": 1168.272705078125, + "completions/min_length": 1015.0, + "completions/min_terminated_length": 1015.0, + "epoch": 0.9939984996249063, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1178384671646238, + "kl": 0.016998291015625, + "learning_rate": 1.0010714732970693e-07, + "loss": 0.0293, + "num_tokens": 180437731.0, + "reward": 0.0, + "reward_std": 1.0547215938568115, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04320413220124633, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07347832479248506, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7583333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11385500851066221, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3975 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1480.0, + "completions/mean_length": 1265.875, + "completions/mean_terminated_length": 1232.4285888671875, + "completions/min_length": 992.0, + "completions/min_terminated_length": 992.0, + "epoch": 0.9942485621405351, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.075524335856864, + "kl": 0.0167236328125, + "learning_rate": 1.0009874728629858e-07, + "loss": -0.0157, + "num_tokens": 180487553.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5505814552307129, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.019607212520947773, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10769178672485429, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12852438880818895, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3976 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1325.0, + "completions/max_terminated_length": 1325.0, + "completions/mean_length": 1046.5, + "completions/mean_terminated_length": 1046.5, + "completions/min_length": 774.0, + "completions/min_terminated_length": 774.0, + "epoch": 0.994498624656164, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.421172850543132, + "kl": 0.0195770263671875, + "learning_rate": 1.0009069005268878e-07, + "loss": -0.0179, + "num_tokens": 180527745.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.7750729322433472, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.04286758693790071, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.19056814449500042, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1517795672580372, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3977 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1406.0, + "completions/mean_length": 1115.5, + "completions/mean_terminated_length": 1089.86669921875, + "completions/min_length": 826.0, + "completions/min_terminated_length": 826.0, + "epoch": 0.994748687171793, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3363476280897713, + "kl": 0.017425537109375, + "learning_rate": 1.0008297563501692e-07, + "loss": 0.0093, + "num_tokens": 180575241.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.0324265956878662, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0981086499673104, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.17151666982673408, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1253144193766372, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3978 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.25, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1454.0, + "completions/mean_length": 1361.25, + "completions/mean_terminated_length": 1315.0, + "completions/min_length": 1052.0, + "completions/min_terminated_length": 1052.0, + "epoch": 0.9949987496874219, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.384874232229979, + "kl": 0.024017333984375, + "learning_rate": 1.0007560403916112e-07, + "loss": 0.0148, + "num_tokens": 180629981.0, + "reward": 0.0, + "reward_std": 0.6256964802742004, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.0458128174539539, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2610883965168908, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8958333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09727776191382573, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3979 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1437.0, + "completions/mean_length": 1265.1875, + "completions/mean_terminated_length": 1249.533447265625, + "completions/min_length": 989.0, + "completions/min_terminated_length": 989.0, + "epoch": 0.9952488122030507, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2166700112748416, + "kl": 0.01873779296875, + "learning_rate": 1.000685752707383e-07, + "loss": -0.0045, + "num_tokens": 180684280.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0454866886138916, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.24286096887777853, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.2830881843102866, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.625, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.5, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09574271077563383, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3980 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1458.0, + "completions/mean_length": 1165.875, + "completions/mean_terminated_length": 1088.769287109375, + "completions/min_length": 708.0, + "completions/min_terminated_length": 708.0, + "epoch": 0.9954988747186797, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.964119326070864, + "kl": 0.0252685546875, + "learning_rate": 1.0006188933510416e-07, + "loss": 0.0042, + "num_tokens": 180738918.0, + "reward": -2.2351741790771484e-08, + "reward_std": 1.0369139909744263, + "rewards/wordcountpos_reward_GEOBench/mean": -2.2351741790771484e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09030605269723878, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.14167467602244369, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8875, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09016445879408155, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3981 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1179.0, + "completions/max_terminated_length": 1179.0, + "completions/mean_length": 954.8125, + "completions/mean_terminated_length": 954.8125, + "completions/min_length": 691.0, + "completions/min_terminated_length": 691.0, + "epoch": 0.9957489372343086, + "frac_reward_zero_std": 0.0, + "grad_norm": 4.019354005952622, + "kl": 0.027984619140625, + "learning_rate": 1.0005554623735306e-07, + "loss": 0.0155, + "num_tokens": 180765331.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.8268804550170898, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.050755239780749965, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12829604318754864, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.12141145226353545, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3982 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1424.0, + "completions/mean_length": 1355.0, + "completions/mean_terminated_length": 1268.0, + "completions/min_length": 1061.0, + "completions/min_terminated_length": 1061.0, + "epoch": 0.9959989997499374, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.0007146402725073, + "kl": 0.024017333984375, + "learning_rate": 1.0004954598231838e-07, + "loss": -0.0377, + "num_tokens": 180804083.0, + "reward": -7.450580596923828e-09, + "reward_std": 1.0212193727493286, + "rewards/wordcountpos_reward_GEOBench/mean": -7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.020239179039381276, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04005134952160716, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08073734277593314, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3983 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1245.3125, + "completions/mean_terminated_length": 1129.5455322265625, + "completions/min_length": 731.0, + "completions/min_terminated_length": 731.0, + "epoch": 0.9962490622655664, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.1253960307792954, + "kl": 0.0107574462890625, + "learning_rate": 1.0004388857457201e-07, + "loss": -0.0106, + "num_tokens": 180847312.0, + "reward": 0.0, + "reward_std": 0.7552182674407959, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09537182450997192, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.10305160293008125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08606629658238707, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3984 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1463.0, + "completions/mean_length": 1254.875, + "completions/mean_terminated_length": 1219.857177734375, + "completions/min_length": 793.0, + "completions/min_terminated_length": 793.0, + "epoch": 0.9964991247811953, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.315417886338798, + "kl": 0.02362060546875, + "learning_rate": 1.0003857401842474e-07, + "loss": 0.0133, + "num_tokens": 180883966.0, + "reward": -4.470348358154297e-08, + "reward_std": 1.0272789001464844, + "rewards/wordcountpos_reward_GEOBench/mean": -4.470348358154297e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.045926245710087174, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05258115638675733, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8791666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10101338378503961, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327956676483154, + "step": 3985 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1143.0, + "completions/max_terminated_length": 1143.0, + "completions/mean_length": 971.3125, + "completions/mean_terminated_length": 971.3125, + "completions/min_length": 881.0, + "completions/min_terminated_length": 881.0, + "epoch": 0.9967491872968242, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.8310074488801837, + "kl": 0.01033782958984375, + "learning_rate": 1.0003360231792605e-07, + "loss": 0.014, + "num_tokens": 180926283.0, + "reward": -1.4901161193847656e-08, + "reward_std": 1.051760196685791, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.08060343160294882, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11882032324966425, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8416666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09699179041242312, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3986 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.4375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1465.0, + "completions/mean_length": 1392.6875, + "completions/mean_terminated_length": 1309.2222900390625, + "completions/min_length": 1186.0, + "completions/min_terminated_length": 1186.0, + "epoch": 0.9969992498124531, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.751618735478377, + "kl": 0.0179443359375, + "learning_rate": 1.000289734768643e-07, + "loss": 0.0194, + "num_tokens": 180972910.0, + "reward": 0.0, + "reward_std": 0.8390613198280334, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.009561165784940887, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.11240097644353902, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09888264649460887, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3987 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1370.0, + "completions/mean_length": 1107.75, + "completions/mean_terminated_length": 1081.60009765625, + "completions/min_length": 872.0, + "completions/min_terminated_length": 872.0, + "epoch": 0.997249312328082, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.380910563393486, + "kl": 0.0203857421875, + "learning_rate": 1.0002468749876643e-07, + "loss": -0.0288, + "num_tokens": 181013938.0, + "reward": 0.0, + "reward_std": 0.9886282086372375, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.008261182640902177, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0254469425821356, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 1.0, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9208333333333334, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08153617692869924, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3988 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1304.0, + "completions/mean_length": 1302.3125, + "completions/mean_terminated_length": 1104.625, + "completions/min_length": 959.0, + "completions/min_terminated_length": 959.0, + "epoch": 0.9974993748437109, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.881823616886722, + "kl": 0.0156402587890625, + "learning_rate": 1.000207443868982e-07, + "loss": 0.0144, + "num_tokens": 181075039.0, + "reward": 0.0, + "reward_std": 1.0567153692245483, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.002197586773622612, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.0886681918609611, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7291666666666666, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08243965245133132, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3989 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1452.0, + "completions/max_terminated_length": 1452.0, + "completions/mean_length": 1118.375, + "completions/mean_terminated_length": 1118.375, + "completions/min_length": 833.0, + "completions/min_terminated_length": 833.0, + "epoch": 0.9977494373593399, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.030360345804693, + "kl": 0.01287841796875, + "learning_rate": 1.0001714414426424e-07, + "loss": -0.0299, + "num_tokens": 181111101.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9768827557563782, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.05840641546731307, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04533106268231757, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7541666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.08333333333333336, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3990 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0625, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1483.0, + "completions/mean_length": 1180.0625, + "completions/mean_terminated_length": 1158.7333984375, + "completions/min_length": 972.0, + "completions/min_terminated_length": 972.0, + "epoch": 0.9979994998749687, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.2351531437670817, + "kl": 0.020416259765625, + "learning_rate": 1.0001388677360768e-07, + "loss": 0.0259, + "num_tokens": 181165190.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.8557232022285461, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.013318237715935155, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.09278237245253866, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.8125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.775, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.10292032157252812, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3991 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1479.0, + "completions/mean_length": 1271.0625, + "completions/mean_terminated_length": 1218.2308349609375, + "completions/min_length": 977.0, + "completions/min_terminated_length": 977.0, + "epoch": 0.9982495623905977, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.9333551594899254, + "kl": 0.021728515625, + "learning_rate": 1.0001097227741065e-07, + "loss": 0.0165, + "num_tokens": 181212407.0, + "reward": 0.0, + "reward_std": 0.973629355430603, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09168699298201799, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1439286760291289, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.9375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.25, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.8041666666666667, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.1275843947266976, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3992 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1492.0, + "completions/mean_length": 1240.9375, + "completions/mean_terminated_length": 1123.181884765625, + "completions/min_length": 691.0, + "completions/min_terminated_length": 691.0, + "epoch": 0.9984996249062266, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.161811534378537, + "kl": 0.021209716796875, + "learning_rate": 1.0000840065789381e-07, + "loss": 0.0025, + "num_tokens": 181271326.0, + "reward": 2.9802322387695312e-08, + "reward_std": 1.066611647605896, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.10863287224110608, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.12726421517708414, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.3125, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.85, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11547005383792518, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3993 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.0, + "completions/max_length": 1470.0, + "completions/max_terminated_length": 1470.0, + "completions/mean_length": 1176.875, + "completions/mean_terminated_length": 1176.875, + "completions/min_length": 1058.0, + "completions/min_terminated_length": 1058.0, + "epoch": 0.9987496874218554, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.5693423102994792, + "kl": 0.0196685791015625, + "learning_rate": 1.0000617191701666e-07, + "loss": -0.0127, + "num_tokens": 181306996.0, + "reward": -2.9802322387695312e-08, + "reward_std": 0.5934744477272034, + "rewards/wordcountpos_reward_GEOBench/mean": -2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.022205711600362478, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.1160896826076282, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09737289911202954, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3994 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.9375, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1485.0, + "completions/mean_length": 1499.0625, + "completions/mean_terminated_length": 1485.0, + "completions/min_length": 1485.0, + "completions/min_terminated_length": 1485.0, + "epoch": 0.9989997499374844, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.340019691532815, + "kl": 0.0138092041015625, + "learning_rate": 1.0000428605647749e-07, + "loss": 0.0004, + "num_tokens": 181360365.0, + "reward": 1.4901161193847656e-08, + "reward_std": 0.9886115789413452, + "rewards/wordcountpos_reward_GEOBench/mean": 1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.11857893514146273, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.07431133461999091, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.9083333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.0683130051063973, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3995 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.5, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1028.0, + "completions/mean_length": 1162.4375, + "completions/mean_terminated_length": 824.875, + "completions/min_length": 634.0, + "completions/min_terminated_length": 634.0, + "epoch": 0.9992498124531133, + "frac_reward_zero_std": 0.0, + "grad_norm": 2.4313826536707865, + "kl": 0.01611328125, + "learning_rate": 1.0000274307771317e-07, + "loss": -0.0149, + "num_tokens": 181410388.0, + "reward": 7.450580596923828e-09, + "reward_std": 1.0077182054519653, + "rewards/wordcountpos_reward_GEOBench/mean": 7.450580596923828e-09, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": -0.0018127480899167382, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.05796600077032136, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.6875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.47871355387816905, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.6958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09418264367902598, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3996 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.3125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1497.0, + "completions/mean_length": 1202.875, + "completions/mean_terminated_length": 1067.8182373046875, + "completions/min_length": 847.0, + "completions/min_terminated_length": 847.0, + "epoch": 0.9994998749687422, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.1987226630803, + "kl": 0.026031494140625, + "learning_rate": 1.000015429818995e-07, + "loss": -0.0358, + "num_tokens": 181463954.0, + "reward": 0.0, + "reward_std": 0.8488315343856812, + "rewards/wordcountpos_reward_GEOBench/mean": 0.0, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.031623638557441616, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.04832296437919263, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.1875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.40311288741492746, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7958333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.11538983843829066, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3997 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.1875, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1232.0, + "completions/mean_length": 998.375, + "completions/mean_terminated_length": 882.6154174804688, + "completions/min_length": 543.0, + "completions/min_terminated_length": 543.0, + "epoch": 0.9997499374843711, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.3640863387615263, + "kl": 0.02490234375, + "learning_rate": 1.0000068576995082e-07, + "loss": -0.0088, + "num_tokens": 181510616.0, + "reward": 2.9802322387695312e-08, + "reward_std": 0.5023515224456787, + "rewards/wordcountpos_reward_GEOBench/mean": 2.9802322387695312e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.008049354414598782, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.022027433677209027, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.4375, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.51234753829798, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7833333333333333, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.17469550228474265, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3998 + }, + { + "clip_ratio/high_max": 0.0, + "clip_ratio/high_mean": 0.0, + "clip_ratio/low_mean": 0.0, + "clip_ratio/low_min": 0.0, + "clip_ratio/region_mean": 0.0, + "completions/clipped_ratio": 0.125, + "completions/max_length": 1500.0, + "completions/max_terminated_length": 1499.0, + "completions/mean_length": 1186.3125, + "completions/mean_terminated_length": 1141.5, + "completions/min_length": 790.0, + "completions/min_terminated_length": 790.0, + "epoch": 1.0, + "frac_reward_zero_std": 0.0, + "grad_norm": 3.027076048851065, + "kl": 0.0157318115234375, + "learning_rate": 1.0000017144252038e-07, + "loss": -0.0039, + "num_tokens": 181557285.0, + "reward": -1.4901161193847656e-08, + "reward_std": 0.9141362905502319, + "rewards/wordcountpos_reward_GEOBench/mean": -1.4901161193847656e-08, + "rewards/wordcountpos_reward_GEOBench/raw_geo/mean": 0.09663224001171876, + "rewards/wordcountpos_reward_GEOBench/raw_geo/std": 0.059601732315270106, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/mean": 0.875, + "rewards/wordcountpos_reward_GEOBench/raw_keypoint/std": 0.3415650255319866, + "rewards/wordcountpos_reward_GEOBench/raw_rule/mean": 0.7125, + "rewards/wordcountpos_reward_GEOBench/raw_rule/std": 0.09339283817414602, + "rewards/wordcountpos_reward_GEOBench/std": 1.0327955484390259, + "step": 3999 + } + ], + "logging_steps": 1, + "max_steps": 3999, + "num_input_tokens_seen": 181557285, + "num_train_epochs": 1, + "save_steps": 400, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}